From 3f0df4d7b0269623cfcd0ef6272e6a3a2ad9066d Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 31 Mar 2021 12:51:03 +0100 Subject: [PATCH] [RISCV] Expand scalable-vector truncstores and extloads Caught in internal testing, these operations are assumed legal by default, even for scalable vector types. Expand them back into separate truncations and stores, or loads and extensions. Also add explicit fixed-length vector tests for these operations, even though they should have been correct already. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D99654 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 45 +- llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll | 1427 ++++++++++++ .../RISCV/rvv/fixed-vectors-extload-truncstore.ll | 2411 ++++++++++++++++++++ 3 files changed, 3878 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 75ad5b9..65002c3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -440,6 +440,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + + // Expand all extending loads to types larger than this, and truncating + // stores from types larger than this. + for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { + setTruncStoreAction(OtherVT, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); + setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); + } } for (MVT VT : IntVecVTs) { @@ -498,6 +507,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STEP_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + + for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { + setTruncStoreAction(VT, OtherVT, Expand); + setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); + setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); + } } // Expand various CCs to best match the RVV ISA, which natively supports UNE @@ -545,17 +561,32 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); }; + // Sets common extload/truncstore actions on RVV floating-point vector + // types. + const auto SetCommonVFPExtLoadTruncStoreActions = + [&](MVT VT, ArrayRef SmallerVTs) { + for (auto SmallVT : SmallerVTs) { + setTruncStoreAction(VT, SmallVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); + } + }; + if (Subtarget.hasStdExtZfh()) for (MVT VT : F16VecVTs) SetCommonVFPActions(VT); - if (Subtarget.hasStdExtF()) - for (MVT VT : F32VecVTs) + for (MVT VT : F32VecVTs) { + if (Subtarget.hasStdExtF()) SetCommonVFPActions(VT); + SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + } - if (Subtarget.hasStdExtD()) - for (MVT VT : F64VecVTs) + for (MVT VT : F64VecVTs) { + if (Subtarget.hasStdExtD()) SetCommonVFPActions(VT); + SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); + SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); + } if (Subtarget.useRVVForFixedLengthVectors()) { for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { @@ -565,8 +596,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // By default everything must be expanded. for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) setOperationAction(Op, VT, Expand); - for (MVT OtherVT : MVT::fixedlen_vector_valuetypes()) + for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, OtherVT, Expand); + setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); + setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); + } // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll new file mode 100644 index 0000000..bf8ed71 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll @@ -0,0 +1,1427 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s + +define @sextload_nxv1i1_nxv1i8(* %x) { +; CHECK-LABEL: sextload_nxv1i1_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle1.v v0, (a0) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v8, v25, -1, v0 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @sextload_nxv1i8_nxv1i16(* %x) { +; CHECK-LABEL: sextload_nxv1i8_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv1i8_nxv1i16(* %x) { +; CHECK-LABEL: zextload_nxv1i8_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv1i8_nxv1i32(* %x) { +; CHECK-LABEL: sextload_nxv1i8_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv1i8_nxv1i32(* %x) { +; CHECK-LABEL: zextload_nxv1i8_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv1i8_nxv1i64(* %x) { +; CHECK-LABEL: sextload_nxv1i8_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv1i8_nxv1i64(* %x) { +; CHECK-LABEL: zextload_nxv1i8_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv2i8_nxv2i16(* %x) { +; CHECK-LABEL: sextload_nxv2i8_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv2i8_nxv2i16(* %x) { +; CHECK-LABEL: zextload_nxv2i8_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv2i8_nxv2i32(* %x) { +; CHECK-LABEL: sextload_nxv2i8_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv2i8_nxv2i32(* %x) { +; CHECK-LABEL: zextload_nxv2i8_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv2i8_nxv2i64(* %x) { +; CHECK-LABEL: sextload_nxv2i8_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vsext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv2i8_nxv2i64(* %x) { +; CHECK-LABEL: zextload_nxv2i8_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vzext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv4i8_nxv4i16(* %x) { +; CHECK-LABEL: sextload_nxv4i8_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv4i8_nxv4i16(* %x) { +; CHECK-LABEL: zextload_nxv4i8_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv4i8_nxv4i32(* %x) { +; CHECK-LABEL: sextload_nxv4i8_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv4i8_nxv4i32(* %x) { +; CHECK-LABEL: zextload_nxv4i8_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv4i8_nxv4i64(* %x) { +; CHECK-LABEL: sextload_nxv4i8_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vsext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv4i8_nxv4i64(* %x) { +; CHECK-LABEL: zextload_nxv4i8_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vzext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv8i8_nxv8i16(* %x) { +; CHECK-LABEL: sextload_nxv8i8_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv8i8_nxv8i16(* %x) { +; CHECK-LABEL: zextload_nxv8i8_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv8i8_nxv8i32(* %x) { +; CHECK-LABEL: sextload_nxv8i8_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv8i8_nxv8i32(* %x) { +; CHECK-LABEL: zextload_nxv8i8_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv8i8_nxv8i64(* %x) { +; CHECK-LABEL: sextload_nxv8i8_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vsext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv8i8_nxv8i64(* %x) { +; CHECK-LABEL: zextload_nxv8i8_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vzext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv16i8_nxv16i16(* %x) { +; CHECK-LABEL: sextload_nxv16i8_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv16i8_nxv16i16(* %x) { +; CHECK-LABEL: zextload_nxv16i8_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv16i8_nxv16i32(* %x) { +; CHECK-LABEL: sextload_nxv16i8_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv16i8_nxv16i32(* %x) { +; CHECK-LABEL: zextload_nxv16i8_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv32i8_nxv32i16(* %x) { +; CHECK-LABEL: sextload_nxv32i8_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4r.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv32i8_nxv32i16(* %x) { +; CHECK-LABEL: zextload_nxv32i8_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4r.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv1i8_nxv1i1( %x, *%z) { +; CHECK-LABEL: truncstore_nxv1i8_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vand.vi v25, v8, 1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1i16_nxv1i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1i16_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv1i16_nxv1i32(* %x) { +; CHECK-LABEL: sextload_nxv1i16_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv1i16_nxv1i32(* %x) { +; CHECK-LABEL: zextload_nxv1i16_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv1i16_nxv1i64(* %x) { +; CHECK-LABEL: sextload_nxv1i16_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv1i16_nxv1i64(* %x) { +; CHECK-LABEL: zextload_nxv1i16_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv2i16_nxv2i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2i16_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv2i16_nxv2i32(* %x) { +; CHECK-LABEL: sextload_nxv2i16_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv2i16_nxv2i32(* %x) { +; CHECK-LABEL: zextload_nxv2i16_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv2i16_nxv2i64(* %x) { +; CHECK-LABEL: sextload_nxv2i16_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv2i16_nxv2i64(* %x) { +; CHECK-LABEL: zextload_nxv2i16_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv4i16_nxv4i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4i16_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv4i16_nxv4i32(* %x) { +; CHECK-LABEL: sextload_nxv4i16_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv4i16_nxv4i32(* %x) { +; CHECK-LABEL: zextload_nxv4i16_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv4i16_nxv4i64(* %x) { +; CHECK-LABEL: sextload_nxv4i16_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv4i16_nxv4i64(* %x) { +; CHECK-LABEL: zextload_nxv4i16_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv8i16_nxv8i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8i16_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv8i16_nxv8i32(* %x) { +; CHECK-LABEL: sextload_nxv8i16_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv8i16_nxv8i32(* %x) { +; CHECK-LABEL: zextload_nxv8i16_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define @sextload_nxv8i16_nxv8i64(* %x) { +; CHECK-LABEL: sextload_nxv8i16_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv8i16_nxv8i64(* %x) { +; CHECK-LABEL: zextload_nxv8i16_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv16i16_nxv16i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16i16_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v8, 0 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv16i16_nxv16i32(* %x) { +; CHECK-LABEL: sextload_nxv16i16_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv16i16_nxv16i32(* %x) { +; CHECK-LABEL: zextload_nxv16i16_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv32i16_nxv32i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv32i16_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1i32_nxv1i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1i32_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1i32_nxv1i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1i32_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv1i32_nxv1i64(* %x) { +; CHECK-LABEL: sextload_nxv1i32_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv1i32_nxv1i64(* %x) { +; CHECK-LABEL: zextload_nxv1i32_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv2i32_nxv2i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2i32_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2i32_nxv2i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2i32_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv2i32_nxv2i64(* %x) { +; CHECK-LABEL: sextload_nxv2i32_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv2i32_nxv2i64(* %x) { +; CHECK-LABEL: zextload_nxv2i32_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv4i32_nxv4i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4i32_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4i32_nxv4i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4i32_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv4i32_nxv4i64(* %x) { +; CHECK-LABEL: sextload_nxv4i32_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv4i32_nxv4i64(* %x) { +; CHECK-LABEL: zextload_nxv4i32_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv8i32_nxv8i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8i32_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8i32_nxv8i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8i32_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v8, 0 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @sextload_nxv8i32_nxv8i64(* %x) { +; CHECK-LABEL: sextload_nxv8i32_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = sext %y to + ret %z +} + +define @zextload_nxv8i32_nxv8i64(* %x) { +; CHECK-LABEL: zextload_nxv8i32_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = zext %y to + ret %z +} + +define void @truncstore_nxv16i32_nxv16i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16i32_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v28, 0 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv16i32_nxv16i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16i32_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1i64_nxv1i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1i64_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1i64_nxv1i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1i64_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1i64_nxv1i32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1i64_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2i64_nxv2i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2i64_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2i64_nxv2i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2i64_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2i64_nxv2i32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2i64_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4i64_nxv4i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4i64_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4i64_nxv4i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4i64_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4i64_nxv4i32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4i64_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v8, 0 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8i64_nxv8i8( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8i64_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v28, 0 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8i64_nxv8i16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8i64_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v28, 0 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8i64_nxv8i32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8i64_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = trunc %x to + store %y, * %z + ret void +} + +define @extload_nxv1f16_nxv1f32(* %x) { +; CHECK-LABEL: extload_nxv1f16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv1f16_nxv1f64(* %x) { +; CHECK-LABEL: extload_nxv1f16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv2f16_nxv2f32(* %x) { +; CHECK-LABEL: extload_nxv2f16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv2f16_nxv2f64(* %x) { +; CHECK-LABEL: extload_nxv2f16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv4f16_nxv4f32(* %x) { +; CHECK-LABEL: extload_nxv4f16_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv4f16_nxv4f64(* %x) { +; CHECK-LABEL: extload_nxv4f16_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv8f16_nxv8f32(* %x) { +; CHECK-LABEL: extload_nxv8f16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv8f16_nxv8f64(* %x) { +; CHECK-LABEL: extload_nxv8f16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v28, v26 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv16f16_nxv16f32(* %x) { +; CHECK-LABEL: extload_nxv16f16_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv1f32_nxv1f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1f32_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv1f32_nxv1f64(* %x) { +; CHECK-LABEL: extload_nxv1f32_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv2f32_nxv2f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2f32_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv2f32_nxv2f64(* %x) { +; CHECK-LABEL: extload_nxv2f32_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv4f32_nxv4f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4f32_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv4f32_nxv4f64(* %x) { +; CHECK-LABEL: extload_nxv4f32_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv8f32_nxv8f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v8 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv8f32_nxv8f64(* %x) { +; CHECK-LABEL: extload_nxv8f32_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv16f32_nxv16f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16f32_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v8 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1f64_nxv1f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1f64_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v25, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv1f64_nxv1f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv1f64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2f64_nxv2f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2f64_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v25, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2f64_nxv2f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2f64_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4f64_nxv4f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4f64_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v26, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v26 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4f64_nxv4f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4f64_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v8 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8f64_nxv8f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8f64_nxv8f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v8 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll new file mode 100644 index 0000000..481c88e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -0,0 +1,2411 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 + +define <2 x i16> @sextload_v2i1_v2i16(<2 x i1>* %x) { +; CHECK-LABEL: sextload_v2i1_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle1.v v0, (a0) +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v8, v25, -1, v0 +; CHECK-NEXT: ret + %y = load <2 x i1>, <2 x i1>* %x + %z = sext <2 x i1> %y to <2 x i16> + ret <2 x i16> %z +} + +define <2 x i16> @sextload_v2i8_v2i16(<2 x i8>* %x) { +; CHECK-LABEL: sextload_v2i8_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i8>, <2 x i8>* %x + %z = sext <2 x i8> %y to <2 x i16> + ret <2 x i16> %z +} + +define <2 x i16> @zextload_v2i8_v2i16(<2 x i8>* %x) { +; CHECK-LABEL: zextload_v2i8_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i8>, <2 x i8>* %x + %z = zext <2 x i8> %y to <2 x i16> + ret <2 x i16> %z +} + +define <2 x i32> @sextload_v2i8_v2i32(<2 x i8>* %x) { +; CHECK-LABEL: sextload_v2i8_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i8>, <2 x i8>* %x + %z = sext <2 x i8> %y to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i32> @zextload_v2i8_v2i32(<2 x i8>* %x) { +; CHECK-LABEL: zextload_v2i8_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i8>, <2 x i8>* %x + %z = zext <2 x i8> %y to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i64> @sextload_v2i8_v2i64(<2 x i8>* %x) { +; CHECK-LABEL: sextload_v2i8_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i8>, <2 x i8>* %x + %z = sext <2 x i8> %y to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i64> @zextload_v2i8_v2i64(<2 x i8>* %x) { +; CHECK-LABEL: zextload_v2i8_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf8 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i8>, <2 x i8>* %x + %z = zext <2 x i8> %y to <2 x i64> + ret <2 x i64> %z +} + +define <4 x i16> @sextload_v4i8_v4i16(<4 x i8>* %x) { +; CHECK-LABEL: sextload_v4i8_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <4 x i8>, <4 x i8>* %x + %z = sext <4 x i8> %y to <4 x i16> + ret <4 x i16> %z +} + +define <4 x i16> @zextload_v4i8_v4i16(<4 x i8>* %x) { +; CHECK-LABEL: zextload_v4i8_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <4 x i8>, <4 x i8>* %x + %z = zext <4 x i8> %y to <4 x i16> + ret <4 x i16> %z +} + +define <4 x i32> @sextload_v4i8_v4i32(<4 x i8>* %x) { +; CHECK-LABEL: sextload_v4i8_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load <4 x i8>, <4 x i8>* %x + %z = sext <4 x i8> %y to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i32> @zextload_v4i8_v4i32(<4 x i8>* %x) { +; CHECK-LABEL: zextload_v4i8_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load <4 x i8>, <4 x i8>* %x + %z = zext <4 x i8> %y to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i64> @sextload_v4i8_v4i64(<4 x i8>* %x) { +; LMULMAX1-LABEL: sextload_v4i8_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v4i8_v4i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; LMULMAX4-NEXT: vsext.vf8 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <4 x i8>, <4 x i8>* %x + %z = sext <4 x i8> %y to <4 x i64> + ret <4 x i64> %z +} + +define <4 x i64> @zextload_v4i8_v4i64(<4 x i8>* %x) { +; LMULMAX1-LABEL: zextload_v4i8_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v4i8_v4i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; LMULMAX4-NEXT: vzext.vf8 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <4 x i8>, <4 x i8>* %x + %z = zext <4 x i8> %y to <4 x i64> + ret <4 x i64> %z +} + +define <8 x i16> @sextload_v8i8_v8i16(<8 x i8>* %x) { +; CHECK-LABEL: sextload_v8i8_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <8 x i8>, <8 x i8>* %x + %z = sext <8 x i8> %y to <8 x i16> + ret <8 x i16> %z +} + +define <8 x i16> @zextload_v8i8_v8i16(<8 x i8>* %x) { +; CHECK-LABEL: zextload_v8i8_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <8 x i8>, <8 x i8>* %x + %z = zext <8 x i8> %y to <8 x i16> + ret <8 x i16> %z +} + +define <8 x i32> @sextload_v8i8_v8i32(<8 x i8>* %x) { +; LMULMAX1-LABEL: sextload_v8i8_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v8i8_v8i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vsext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i8>, <8 x i8>* %x + %z = sext <8 x i8> %y to <8 x i32> + ret <8 x i32> %z +} + +define <8 x i32> @zextload_v8i8_v8i32(<8 x i8>* %x) { +; LMULMAX1-LABEL: zextload_v8i8_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v8i8_v8i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vzext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i8>, <8 x i8>* %x + %z = zext <8 x i8> %y to <8 x i32> + ret <8 x i32> %z +} + +define <8 x i64> @sextload_v8i8_v8i64(<8 x i8>* %x) { +; LMULMAX1-LABEL: sextload_v8i8_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v11, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v8i8_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf8 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i8>, <8 x i8>* %x + %z = sext <8 x i8> %y to <8 x i64> + ret <8 x i64> %z +} + +define <8 x i64> @zextload_v8i8_v8i64(<8 x i8>* %x) { +; LMULMAX1-LABEL: zextload_v8i8_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v11, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v8i8_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf8 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i8>, <8 x i8>* %x + %z = zext <8 x i8> %y to <8 x i64> + ret <8 x i64> %z +} + +define <16 x i16> @sextload_v16i8_v16i16(<16 x i8>* %x) { +; LMULMAX1-LABEL: sextload_v16i8_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v16i8_v16i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <16 x i8>, <16 x i8>* %x + %z = sext <16 x i8> %y to <16 x i16> + ret <16 x i16> %z +} + +define <16 x i16> @zextload_v16i8_v16i16(<16 x i8>* %x) { +; LMULMAX1-LABEL: zextload_v16i8_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v16i8_v16i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <16 x i8>, <16 x i8>* %x + %z = zext <16 x i8> %y to <16 x i16> + ret <16 x i16> %z +} + +define <16 x i32> @sextload_v16i8_v16i32(<16 x i8>* %x) { +; LMULMAX1-LABEL: sextload_v16i8_v16i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v11, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v16i8_v16i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <16 x i8>, <16 x i8>* %x + %z = sext <16 x i8> %y to <16 x i32> + ret <16 x i32> %z +} + +define <16 x i32> @zextload_v16i8_v16i32(<16 x i8>* %x) { +; LMULMAX1-LABEL: zextload_v16i8_v16i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v11, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v16i8_v16i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <16 x i8>, <16 x i8>* %x + %z = zext <16 x i8> %y to <16 x i32> + ret <16 x i32> %z +} + +define <16 x i64> @sextload_v16i8_v16i64(<16 x i8>* %x) { +; LMULMAX1-LABEL: sextload_v16i8_v16i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v12, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v14, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v15, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v9, v27 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v10, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v11, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf8 v13, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v16i8_v16i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf8 v8, v25 +; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf8 v12, v25 +; LMULMAX4-NEXT: ret + %y = load <16 x i8>, <16 x i8>* %x + %z = sext <16 x i8> %y to <16 x i64> + ret <16 x i64> %z +} + +define <16 x i64> @zextload_v16i8_v16i64(<16 x i8>* %x) { +; LMULMAX1-LABEL: zextload_v16i8_v16i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle8.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v12, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v14, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v15, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v9, v27 +; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v10, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v11, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf8 v13, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v16i8_v16i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vle8.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf8 v8, v25 +; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8 +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf8 v12, v25 +; LMULMAX4-NEXT: ret + %y = load <16 x i8>, <16 x i8>* %x + %z = zext <16 x i8> %y to <16 x i64> + ret <16 x i64> %z +} + +define void @truncstore_v2i8_v2i1(<2 x i8> %x, <2 x i1>* %z) { +; CHECK-LABEL: truncstore_v2i8_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vand.vi v25, v8, 1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i8> %x to <2 x i1> + store <2 x i1> %y, <2 x i1>* %z + ret void +} + +define void @truncstore_v2i16_v2i8(<2 x i16> %x, <2 x i8>* %z) { +; CHECK-LABEL: truncstore_v2i16_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i16> %x to <2 x i8> + store <2 x i8> %y, <2 x i8>* %z + ret void +} + +define <2 x i32> @sextload_v2i16_v2i32(<2 x i16>* %x) { +; CHECK-LABEL: sextload_v2i16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i16>, <2 x i16>* %x + %z = sext <2 x i16> %y to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i32> @zextload_v2i16_v2i32(<2 x i16>* %x) { +; CHECK-LABEL: zextload_v2i16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i16>, <2 x i16>* %x + %z = zext <2 x i16> %y to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i64> @sextload_v2i16_v2i64(<2 x i16>* %x) { +; CHECK-LABEL: sextload_v2i16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i16>, <2 x i16>* %x + %z = sext <2 x i16> %y to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i64> @zextload_v2i16_v2i64(<2 x i16>* %x) { +; CHECK-LABEL: zextload_v2i16_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf4 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i16>, <2 x i16>* %x + %z = zext <2 x i16> %y to <2 x i64> + ret <2 x i64> %z +} + +define void @truncstore_v4i16_v4i8(<4 x i16> %x, <4 x i8>* %z) { +; CHECK-LABEL: truncstore_v4i16_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e8,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <4 x i16> %x to <4 x i8> + store <4 x i8> %y, <4 x i8>* %z + ret void +} + +define <4 x i32> @sextload_v4i16_v4i32(<4 x i16>* %x) { +; CHECK-LABEL: sextload_v4i16_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <4 x i16>, <4 x i16>* %x + %z = sext <4 x i16> %y to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i32> @zextload_v4i16_v4i32(<4 x i16>* %x) { +; CHECK-LABEL: zextload_v4i16_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <4 x i16>, <4 x i16>* %x + %z = zext <4 x i16> %y to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i64> @sextload_v4i16_v4i64(<4 x i16>* %x) { +; LMULMAX1-LABEL: sextload_v4i16_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v4i16_v4i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX4-NEXT: vle16.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; LMULMAX4-NEXT: vsext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <4 x i16>, <4 x i16>* %x + %z = sext <4 x i16> %y to <4 x i64> + ret <4 x i64> %z +} + +define <4 x i64> @zextload_v4i16_v4i64(<4 x i16>* %x) { +; LMULMAX1-LABEL: zextload_v4i16_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v4i16_v4i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX4-NEXT: vle16.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; LMULMAX4-NEXT: vzext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <4 x i16>, <4 x i16>* %x + %z = zext <4 x i16> %y to <4 x i64> + ret <4 x i64> %z +} + +define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %z) { +; CHECK-LABEL: truncstore_v8i16_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <8 x i16> %x to <8 x i8> + store <8 x i8> %y, <8 x i8>* %z + ret void +} + +define <8 x i32> @sextload_v8i16_v8i32(<8 x i16>* %x) { +; LMULMAX1-LABEL: sextload_v8i16_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v8i16_v8i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vle16.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i16>, <8 x i16>* %x + %z = sext <8 x i16> %y to <8 x i32> + ret <8 x i32> %z +} + +define <8 x i32> @zextload_v8i16_v8i32(<8 x i16>* %x) { +; LMULMAX1-LABEL: zextload_v8i16_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v8i16_v8i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vle16.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i16>, <8 x i16>* %x + %z = zext <8 x i16> %y to <8 x i32> + ret <8 x i32> %z +} + +define <8 x i64> @sextload_v8i16_v8i64(<8 x i16>* %x) { +; LMULMAX1-LABEL: sextload_v8i16_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v11, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v8i16_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vle16.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i16>, <8 x i16>* %x + %z = sext <8 x i16> %y to <8 x i64> + ret <8 x i64> %z +} + +define <8 x i64> @zextload_v8i16_v8i64(<8 x i16>* %x) { +; LMULMAX1-LABEL: zextload_v8i16_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v11, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v8i16_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vle16.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf4 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <8 x i16>, <8 x i16>* %x + %z = zext <8 x i16> %y to <8 x i64> + ret <8 x i64> %z +} + +define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %z) { +; LMULMAX1-LABEL: truncstore_v16i16_v16i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 8 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v16i16_v16i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX4-NEXT: vse8.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <16 x i16> %x to <16 x i8> + store <16 x i8> %y, <16 x i8>* %z + ret void +} + +define <16 x i32> @sextload_v16i16_v16i32(<16 x i16>* %x) { +; LMULMAX1-LABEL: sextload_v16i16_v16i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: vsext.vf2 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v11, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v16i16_v16i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vle16.v v26, (a0) +; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v8, v26 +; LMULMAX4-NEXT: ret + %y = load <16 x i16>, <16 x i16>* %x + %z = sext <16 x i16> %y to <16 x i32> + ret <16 x i32> %z +} + +define <16 x i32> @zextload_v16i16_v16i32(<16 x i16>* %x) { +; LMULMAX1-LABEL: zextload_v16i16_v16i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: vzext.vf2 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v11, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v16i16_v16i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vle16.v v26, (a0) +; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v8, v26 +; LMULMAX4-NEXT: ret + %y = load <16 x i16>, <16 x i16>* %x + %z = zext <16 x i16> %y to <16 x i32> + ret <16 x i32> %z +} + +define <16 x i64> @sextload_v16i16_v16i64(<16 x i16>* %x) { +; LMULMAX1-LABEL: sextload_v16i16_v16i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v10, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v11, v27 +; LMULMAX1-NEXT: vsext.vf4 v12, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v14, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v15, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v9, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf4 v13, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v16i16_v16i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vle16.v v26, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf4 v8, v26 +; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu +; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8 +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf4 v12, v26 +; LMULMAX4-NEXT: ret + %y = load <16 x i16>, <16 x i16>* %x + %z = sext <16 x i16> %y to <16 x i64> + ret <16 x i64> %z +} + +define <16 x i64> @zextload_v16i16_v16i64(<16 x i16>* %x) { +; LMULMAX1-LABEL: zextload_v16i16_v16i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vle16.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle16.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v10, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v11, v27 +; LMULMAX1-NEXT: vzext.vf4 v12, v26 +; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v14, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v15, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v9, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf4 v13, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v16i16_v16i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vle16.v v26, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf4 v8, v26 +; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu +; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8 +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf4 v12, v26 +; LMULMAX4-NEXT: ret + %y = load <16 x i16>, <16 x i16>* %x + %z = zext <16 x i16> %y to <16 x i64> + ret <16 x i64> %z +} + +define void @truncstore_v2i32_v2i8(<2 x i32> %x, <2 x i8>* %z) { +; CHECK-LABEL: truncstore_v2i32_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i32> %x to <2 x i8> + store <2 x i8> %y, <2 x i8>* %z + ret void +} + +define void @truncstore_v2i32_v2i16(<2 x i32> %x, <2 x i16>* %z) { +; CHECK-LABEL: truncstore_v2i32_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i32> %x to <2 x i16> + store <2 x i16> %y, <2 x i16>* %z + ret void +} + +define <2 x i64> @sextload_v2i32_v2i64(<2 x i32>* %x) { +; CHECK-LABEL: sextload_v2i32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vsext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i32>, <2 x i32>* %x + %z = sext <2 x i32> %y to <2 x i64> + ret <2 x i64> %z +} + +define <2 x i64> @zextload_v2i32_v2i64(<2 x i32>* %x) { +; CHECK-LABEL: zextload_v2i32_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vzext.vf2 v8, v25 +; CHECK-NEXT: ret + %y = load <2 x i32>, <2 x i32>* %x + %z = zext <2 x i32> %y to <2 x i64> + ret <2 x i64> %z +} + +define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %z) { +; CHECK-LABEL: truncstore_v4i32_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc <4 x i32> %x to <4 x i8> + store <4 x i8> %y, <4 x i8>* %z + ret void +} + +define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %z) { +; CHECK-LABEL: truncstore_v4i32_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <4 x i32> %x to <4 x i16> + store <4 x i16> %y, <4 x i16>* %z + ret void +} + +define <4 x i64> @sextload_v4i32_v4i64(<4 x i32>* %x) { +; LMULMAX1-LABEL: sextload_v4i32_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v4i32_v4i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX4-NEXT: vle32.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <4 x i32>, <4 x i32>* %x + %z = sext <4 x i32> %y to <4 x i64> + ret <4 x i64> %z +} + +define <4 x i64> @zextload_v4i32_v4i64(<4 x i32>* %x) { +; LMULMAX1-LABEL: zextload_v4i32_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v4i32_v4i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX4-NEXT: vle32.v v25, (a0) +; LMULMAX4-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v8, v25 +; LMULMAX4-NEXT: ret + %y = load <4 x i32>, <4 x i32>* %x + %z = zext <4 x i32> %y to <4 x i64> + ret <4 x i64> %z +} + +define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) { +; LMULMAX1-LABEL: truncstore_v8i32_v8i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v27, 4 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v8i32_v8i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vse8.v v26, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <8 x i32> %x to <8 x i8> + store <8 x i8> %y, <8 x i8>* %z + ret void +} + +define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %z) { +; LMULMAX1-LABEL: truncstore_v8i32_v8i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 4 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v8i32_v8i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX4-NEXT: vse16.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <8 x i32> %x to <8 x i16> + store <8 x i16> %y, <8 x i16>* %z + ret void +} + +define <8 x i64> @sextload_v8i32_v8i64(<8 x i32>* %x) { +; LMULMAX1-LABEL: sextload_v8i32_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v9, v25 +; LMULMAX1-NEXT: vsext.vf2 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v11, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v8i32_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vle32.v v26, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v8, v26 +; LMULMAX4-NEXT: ret + %y = load <8 x i32>, <8 x i32>* %x + %z = sext <8 x i32> %y to <8 x i64> + ret <8 x i64> %z +} + +define <8 x i64> @zextload_v8i32_v8i64(<8 x i32>* %x) { +; LMULMAX1-LABEL: zextload_v8i32_v8i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle32.v v26, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v8, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v9, v25 +; LMULMAX1-NEXT: vzext.vf2 v10, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v11, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v8i32_v8i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vle32.v v26, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v8, v26 +; LMULMAX4-NEXT: ret + %y = load <8 x i32>, <8 x i32>* %x + %z = zext <8 x i32> %y to <8 x i64> + ret <8 x i64> %z +} + +define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) { +; LMULMAX1-LABEL: truncstore_v16i32_v16i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v25 +; LMULMAX1-NEXT: vslideup.vi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v28, 4 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v28, 4 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v27, 8 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v16i32_v16i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX4-NEXT: vse8.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <16 x i32> %x to <16 x i8> + store <16 x i8> %y, <16 x i8>* %z + ret void +} + +define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %z) { +; LMULMAX1-LABEL: truncstore_v16i32_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v26 +; LMULMAX1-NEXT: vslideup.vi v27, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v25, 4 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 4 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v26, (a1) +; LMULMAX1-NEXT: vse16.v v27, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v16i32_v16i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vse16.v v26, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <16 x i32> %x to <16 x i16> + store <16 x i16> %y, <16 x i16>* %z + ret void +} + +define <16 x i64> @sextload_v16i32_v16i64(<16 x i32>* %x) { +; LMULMAX1-LABEL: sextload_v16i32_v16i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 48 +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: addi a1, a0, 32 +; LMULMAX1-NEXT: vle32.v v26, (a1) +; LMULMAX1-NEXT: vle32.v v27, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle32.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v8, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v9, v27 +; LMULMAX1-NEXT: vsext.vf2 v10, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v11, v27 +; LMULMAX1-NEXT: vsext.vf2 v12, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v13, v26 +; LMULMAX1-NEXT: vsext.vf2 v14, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vsext.vf2 v15, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: sextload_v16i32_v16i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vle32.v v28, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v8, v28 +; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu +; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8 +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vsext.vf2 v12, v28 +; LMULMAX4-NEXT: ret + %y = load <16 x i32>, <16 x i32>* %x + %z = sext <16 x i32> %y to <16 x i64> + ret <16 x i64> %z +} + +define <16 x i64> @zextload_v16i32_v16i64(<16 x i32>* %x) { +; LMULMAX1-LABEL: zextload_v16i32_v16i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a1, a0, 48 +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: addi a1, a0, 32 +; LMULMAX1-NEXT: vle32.v v26, (a1) +; LMULMAX1-NEXT: vle32.v v27, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vle32.v v28, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v8, v27 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v9, v27 +; LMULMAX1-NEXT: vzext.vf2 v10, v28 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v11, v27 +; LMULMAX1-NEXT: vzext.vf2 v12, v26 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v13, v26 +; LMULMAX1-NEXT: vzext.vf2 v14, v25 +; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: vzext.vf2 v15, v25 +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: zextload_v16i32_v16i64: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vle32.v v28, (a0) +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v8, v28 +; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu +; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8 +; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; LMULMAX4-NEXT: vzext.vf2 v12, v28 +; LMULMAX4-NEXT: ret + %y = load <16 x i32>, <16 x i32>* %x + %z = zext <16 x i32> %y to <16 x i64> + ret <16 x i64> %z +} + +define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %z) { +; CHECK-LABEL: truncstore_v2i64_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i64> %x to <2 x i8> + store <2 x i8> %y, <2 x i8>* %z + ret void +} + +define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %z) { +; CHECK-LABEL: truncstore_v2i64_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; CHECK-NEXT: vnsrl.wi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i64> %x to <2 x i16> + store <2 x i16> %y, <2 x i16>* %z + ret void +} + +define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %z) { +; CHECK-LABEL: truncstore_v2i64_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v8, 0 +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %y = trunc <2 x i64> %x to <2 x i32> + store <2 x i32> %y, <2 x i32>* %z + ret void +} + +define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) { +; LMULMAX1-LABEL: truncstore_v4i64_v4i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v26, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v4i64_v4i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX4-NEXT: vsetivli a1, 4, e8,mf4,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX4-NEXT: vse8.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <4 x i64> %x to <4 x i8> + store <4 x i8> %y, <4 x i8>* %z + ret void +} + +define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) { +; LMULMAX1-LABEL: truncstore_v4i64_v4i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v27, 2 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v25, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v4i64_v4i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 4, e16,mf2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX4-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX4-NEXT: vse16.v v26, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <4 x i64> %x to <4 x i16> + store <4 x i16> %y, <4 x i16>* %z + ret void +} + +define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %z) { +; LMULMAX1-LABEL: truncstore_v4i64_v4i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v26, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v4i64_v4i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX4-NEXT: vse32.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <4 x i64> %x to <4 x i32> + store <4 x i32> %y, <4 x i32>* %z + ret void +} + +define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) { +; LMULMAX1-LABEL: truncstore_v8i64_v8i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v26 +; LMULMAX1-NEXT: vslideup.vi v27, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v27, 4 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v8i64_v8i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX4-NEXT: vse8.v v26, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <8 x i64> %x to <8 x i8> + store <8 x i8> %y, <8 x i8>* %z + ret void +} + +define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) { +; LMULMAX1-LABEL: truncstore_v8i64_v8i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v25 +; LMULMAX1-NEXT: vslideup.vi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v28, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v28, 2 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v27, 4 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v26, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v8i64_v8i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX4-NEXT: vse16.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <8 x i64> %x to <8 x i16> + store <8 x i16> %y, <8 x i16>* %z + ret void +} + +define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %z) { +; LMULMAX1-LABEL: truncstore_v8i64_v8i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v26 +; LMULMAX1-NEXT: vslideup.vi v27, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 2 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: vse32.v v27, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v8i64_v8i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vse32.v v26, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <8 x i64> %x to <8 x i32> + store <8 x i32> %y, <8 x i32>* %z + ret void +} + +define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) { +; LMULMAX1-LABEL: truncstore_v16i64_v16i8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v14, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v26, v25 +; LMULMAX1-NEXT: vslideup.vi v26, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v15, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v27, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v12, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v28, v25 +; LMULMAX1-NEXT: vslideup.vi v28, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v13, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v29, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v29, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v28, v27, 2 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v29, v27 +; LMULMAX1-NEXT: vslideup.vi v29, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v29, v26, 4 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v28, v25 +; LMULMAX1-NEXT: vslideup.vi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v30, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v30, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v28, v26, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v30, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v30, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v30, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e8,mf8,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v30, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v26, 2 +; LMULMAX1-NEXT: vslideup.vi v27, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v28, 4 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v29, 8 +; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vse8.v v25, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v16i64_v16i8: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v25, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vmv.v.i v25, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,m1,tu,mu +; LMULMAX4-NEXT: vslideup.vi v25, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v12, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e8,mf2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v28, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,tu,mu +; LMULMAX4-NEXT: vslideup.vi v25, v26, 8 +; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX4-NEXT: vse8.v v25, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <16 x i64> %x to <16 x i8> + store <16 x i8> %y, <16 x i8>* %z + ret void +} + +define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) { +; LMULMAX1-LABEL: truncstore_v16i64_v16i16: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v25 +; LMULMAX1-NEXT: vslideup.vi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v28, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v26, v25 +; LMULMAX1-NEXT: vslideup.vi v26, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v28, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v29, v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v29, 2 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v28, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v29, v28 +; LMULMAX1-NEXT: vslideup.vi v29, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v29, v27, 4 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v26, v14, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v26, v25 +; LMULMAX1-NEXT: vslideup.vi v26, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v15, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v30, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v30, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v12, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v30, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v30, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v27, v13, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e16,mf4,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v30, v27, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v25, v30, 2 +; LMULMAX1-NEXT: vslideup.vi v28, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v28, v26, 4 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu +; LMULMAX1-NEXT: vse16.v v28, (a1) +; LMULMAX1-NEXT: vse16.v v29, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v16i64_v16i16: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v28, v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vmv.v.i v26, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m2,tu,mu +; LMULMAX4-NEXT: vslideup.vi v26, v28, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v30, v28, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,tu,mu +; LMULMAX4-NEXT: vslideup.vi v26, v30, 8 +; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX4-NEXT: vse16.v v26, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <16 x i64> %x to <16 x i16> + store <16 x i16> %y, <16 x i16>* %z + ret void +} + +define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) { +; LMULMAX1-LABEL: truncstore_v16i64_v16i32: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v8, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v27, v26 +; LMULMAX1-NEXT: vslideup.vi v27, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v9, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v27, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v10, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v28, v26 +; LMULMAX1-NEXT: vslideup.vi v28, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v11, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v28, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v12, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vmv1r.v v29, v26 +; LMULMAX1-NEXT: vslideup.vi v29, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v13, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v29, v25, 2 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v14, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a1, 2, e32,mf2,ta,mu +; LMULMAX1-NEXT: vnsrl.wi v25, v15, 0 +; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 2 +; LMULMAX1-NEXT: addi a1, a0, 48 +; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu +; LMULMAX1-NEXT: vse32.v v26, (a1) +; LMULMAX1-NEXT: addi a1, a0, 32 +; LMULMAX1-NEXT: vse32.v v29, (a1) +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vse32.v v28, (a1) +; LMULMAX1-NEXT: vse32.v v27, (a0) +; LMULMAX1-NEXT: ret +; +; LMULMAX4-LABEL: truncstore_v16i64_v16i32: +; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v28, v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vmv.v.i v8, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m4,tu,mu +; LMULMAX4-NEXT: vslideup.vi v8, v28, 0 +; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0 +; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,tu,mu +; LMULMAX4-NEXT: vslideup.vi v8, v28, 8 +; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; LMULMAX4-NEXT: vse32.v v8, (a0) +; LMULMAX4-NEXT: ret + %y = trunc <16 x i64> %x to <16 x i32> + store <16 x i32> %y, <16 x i32>* %z + ret void +} + +define @extload_nxv2f16_nxv2f32(* %x) { +; CHECK-LABEL: extload_nxv2f16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv2f16_nxv2f64(* %x) { +; CHECK-LABEL: extload_nxv2f16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv4f16_nxv4f32(* %x) { +; CHECK-LABEL: extload_nxv4f16_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv4f16_nxv4f64(* %x) { +; CHECK-LABEL: extload_nxv4f16_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v26, v25 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv8f16_nxv8f32(* %x) { +; CHECK-LABEL: extload_nxv8f16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv8f16_nxv8f64(* %x) { +; CHECK-LABEL: extload_nxv8f16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v28, v26 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv16f16_nxv16f32(* %x) { +; CHECK-LABEL: extload_nxv16f16_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define @extload_nxv16f16_nxv16f64(* %x) { +; CHECK-LABEL: extload_nxv16f16_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re16.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v24, v30 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v16, v24 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv2f32_nxv2f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2f32_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv2f32_nxv2f64(* %x) { +; CHECK-LABEL: extload_nxv2f32_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v25 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv4f32_nxv4f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4f32_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv4f32_nxv4f64(* %x) { +; CHECK-LABEL: extload_nxv4f32_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re32.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v26 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv8f32_nxv8f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8f32_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v8 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv8f32_nxv8f64(* %x) { +; CHECK-LABEL: extload_nxv8f32_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl4re32.v v28, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv16f32_nxv16f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16f32_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v8 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define @extload_nxv16f32_nxv16f64(* %x) { +; CHECK-LABEL: extload_nxv16f32_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v24 +; CHECK-NEXT: vfwcvt.f.f.v v16, v28 +; CHECK-NEXT: ret + %y = load , * %x + %z = fpext %y to + ret %z +} + +define void @truncstore_nxv2f64_nxv2f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2f64_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v25, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v25 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv2f64_nxv2f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv2f64_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v8 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4f64_nxv4f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4f64_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v26, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v25, v26 +; CHECK-NEXT: vs1r.v v25, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv4f64_nxv4f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv4f64_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v8 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8f64_nxv8f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8f64_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v26, v28 +; CHECK-NEXT: vs2r.v v26, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv8f64_nxv8f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv8f64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v28, v8 +; CHECK-NEXT: vs4r.v v28, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv16f64_nxv16f16( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16f64_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v8, v28 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.rod.f.f.w v28, v16 +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v10, v28 +; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} + +define void @truncstore_nxv16f64_nxv16f32( %x, * %z) { +; CHECK-LABEL: truncstore_nxv16f64_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vfncvt.f.f.w v24, v8 +; CHECK-NEXT: vfncvt.f.f.w v28, v16 +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: ret + %y = fptrunc %x to + store %y, * %z + ret void +} -- 2.7.4