unsigned NumParts = TLI.getNumRegisters(Context, VT);
if (NumParts == 1) {
- SplitArgs.push_back(OrigArg);
+ // replace the original type ( pointer -> GPR ).
+ SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context),
+ OrigArg.Flags, OrigArg.IsFixed);
return;
}
#error "You shouldn't build this"
#endif
-namespace llvm {
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{
/* StartIdx, Length, RegBank */
// GPR value
{0, 256, X86::VECRRegBank}, // :7
{0, 512, X86::VECRRegBank}, // :8
};
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+#ifdef GET_TARGET_REGBANK_INFO_CLASS
enum PartialMappingIdx {
PMI_None = -1,
PMI_GPR8,
PMI_VEC256,
PMI_VEC512
};
+#endif // GET_TARGET_REGBANK_INFO_CLASS
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
#define INSTR_3OP(INFO) INFO, INFO, INFO,
#define BREAKDOWN(INDEX, NUM) \
{ &X86GenRegisterBankInfo::PartMappings[INDEX], NUM }
};
#undef INSTR_3OP
#undef BREAKDOWN
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+#ifdef GET_TARGET_REGBANK_INFO_CLASS
enum ValueMappingIdx {
VMI_None = -1,
VMI_3OpsGpr8Idx = PMI_GPR8 * 3,
VMI_3OpsVec256Idx = PMI_VEC256 * 3,
VMI_3OpsVec512Idx = PMI_VEC512 * 3,
};
+#undef GET_TARGET_REGBANK_INFO_CLASS
+#endif // GET_TARGET_REGBANK_INFO_CLASS
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+#undef GET_TARGET_REGBANK_INFO_IMPL
+const RegisterBankInfo::ValueMapping *
+X86GenRegisterBankInfo::getValueMapping(PartialMappingIdx Idx,
+ unsigned NumOperands) {
+
+ // We can use VMI_3Ops Mapping for all the cases.
+ if (NumOperands <= 3 && (Idx >= PMI_GPR8 && Idx <= PMI_VEC512))
+ return &ValMappings[(unsigned)Idx * 3];
+
+ llvm_unreachable("Unsupported PartialMappingIdx.");
+}
+
+#endif // GET_TARGET_REGBANK_INFO_IMPL
-} // End llvm namespace.
//===----------------------------------------------------------------------===//
#include "X86InstructionSelector.h"
+#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86RegisterBankInfo.h"
#include "X86RegisterInfo.h"
// TODO: This should be implemented by tblgen, pattern with predicate not
// supported yet.
- if (selectBinaryOp(I, MRI))
+ if (selectBinaryOp(I, MRI, MF))
+ return true;
+ if (selectLoadStoreOp(I, MRI, MF))
return true;
return selectImpl(I);
}
bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
- MachineRegisterInfo &MRI) const {
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
- LLT Ty = MRI.getType(I.getOperand(0).getReg());
const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
unsigned NewOpc = I.getOpcode();
- switch (I.getOpcode()) {
+ switch (NewOpc) {
case TargetOpcode::G_FADD:
NewOpc = getFAddOp(Ty, RB);
break;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+
+unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
+ unsigned Opc,
+ uint64_t Alignment) const {
+ bool Isload = (Opc == TargetOpcode::G_LOAD);
+ bool HasAVX = STI.hasAVX();
+ bool HasAVX512 = STI.hasAVX512();
+ bool HasVLX = STI.hasVLX();
+
+ if (Ty == LLT::scalar(8)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV8rm : X86::MOV8mr;
+ } else if (Ty == LLT::scalar(16)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV16rm : X86::MOV16mr;
+ } else if (Ty == LLT::scalar(32)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::VECRRegBankID == RB.getID())
+ return Isload ? (HasAVX512 ? X86::VMOVSSZrm
+ : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm)
+ : (HasAVX512 ? X86::VMOVSSZmr
+ : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ } else if (Ty == LLT::scalar(64)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::VECRRegBankID == RB.getID())
+ return Isload ? (HasAVX512 ? X86::VMOVSDZrm
+ : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm)
+ : (HasAVX512 ? X86::VMOVSDZmr
+ : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
+ if (Alignment >= 16)
+ return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
+ : HasAVX512
+ ? X86::VMOVAPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
+ : (HasVLX ? X86::VMOVAPSZ128mr
+ : HasAVX512
+ ? X86::VMOVAPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
+ else
+ return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
+ : HasAVX512
+ ? X86::VMOVUPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
+ : (HasVLX ? X86::VMOVUPSZ128mr
+ : HasAVX512
+ ? X86::VMOVUPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
+ return Opc;
+}
+
+bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ unsigned Opc = I.getOpcode();
+
+ if (Opc != TargetOpcode::G_STORE && Opc != TargetOpcode::G_LOAD)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+ auto &MemOp = **I.memoperands_begin();
+ unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
+ if (NewOpc == Opc)
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+ MachineInstrBuilder MIB(MF, I);
+ if (Opc == TargetOpcode::G_LOAD)
+ addOffset(MIB, 0);
+ else {
+ // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
+ I.RemoveOperand(0);
+ addOffset(MIB, 0).addUse(DefReg);
+ }
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
class LLT;
class RegisterBank;
class MachineRegisterInfo;
+class MachineFunction;
class X86InstructionSelector : public InstructionSelector {
public:
unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const;
unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const;
unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const;
- bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc,
+ uint64_t Alignment) const;
+
+ bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
const X86Subtarget &STI;
const X86InstrInfo &TII;
void X86LegalizerInfo::setLegalizerInfo32bit() {
+ if (Subtarget.is64Bit())
+ return;
+
+ const LLT p0 = LLT::pointer(0, 32);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
- for (auto Ty : {s8, s16, s32}) {
- setAction({G_ADD, Ty}, Legal);
- setAction({G_SUB, Ty}, Legal);
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {s8, s16, s32})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s8, s16, s32, p0})
+ setAction({MemOp, Ty}, Legal);
+
+ // And everything's fine in addrspace 0.
+ setAction({MemOp, 1, p0}, Legal);
}
}
if (!Subtarget.is64Bit())
return;
+ const LLT p0 = LLT::pointer(0, 64);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- setAction({G_ADD, s64}, Legal);
- setAction({G_SUB, s64}, Legal);
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {s8, s16, s32, s64})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({MemOp, Ty}, Legal);
+
+ // And everything's fine in addrspace 0.
+ setAction({MemOp, 1, p0}, Legal);
+ }
}
void X86LegalizerInfo::setLegalizerInfoSSE1() {
const LLT s32 = LLT::scalar(32);
const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
for (auto Ty : {s32, v4s32})
setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE})
+ for (auto Ty : {v4s32, v2s64})
+ setAction({MemOp, Ty}, Legal);
}
void X86LegalizerInfo::setLegalizerInfoSSE2() {
#define GET_TARGET_REGBANK_IMPL
#include "X86GenRegisterBank.inc"
+using namespace llvm;
// This file will be TableGen'ed at some point.
+#define GET_TARGET_REGBANK_INFO_IMPL
#include "X86GenRegisterBankInfo.def"
-using namespace llvm;
-
#ifndef LLVM_BUILD_GLOBAL_ISEL
#error "You shouldn't build this"
#endif
llvm_unreachable("Unsupported register kind yet.");
}
-RegisterBankInfo::InstructionMapping
-X86RegisterBankInfo::getOperandsMapping(const MachineInstr &MI, bool isFP) {
- const MachineFunction &MF = *MI.getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- unsigned NumOperands = MI.getNumOperands();
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-
- if (NumOperands != 3 || (Ty != MRI.getType(MI.getOperand(1).getReg())) ||
- (Ty != MRI.getType(MI.getOperand(2).getReg())))
- llvm_unreachable("Unsupported operand maping yet.");
-
- ValueMappingIdx ValMapIdx = VMI_None;
-
- if (Ty.isScalar()) {
- if (!isFP) {
- switch (Ty.getSizeInBits()) {
- case 8:
- ValMapIdx = VMI_3OpsGpr8Idx;
- break;
- case 16:
- ValMapIdx = VMI_3OpsGpr16Idx;
- break;
- case 32:
- ValMapIdx = VMI_3OpsGpr32Idx;
- break;
- case 64:
- ValMapIdx = VMI_3OpsGpr64Idx;
- break;
- default:
- llvm_unreachable("Unsupported register size.");
- }
- } else {
- switch (Ty.getSizeInBits()) {
- case 32:
- ValMapIdx = VMI_3OpsFp32Idx;
- break;
- case 64:
- ValMapIdx = VMI_3OpsFp64Idx;
- break;
- default:
- llvm_unreachable("Unsupported register size.");
- }
+X86GenRegisterBankInfo::PartialMappingIdx
+X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
+ if ((Ty.isScalar() && !isFP) || Ty.isPointer()) {
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ return PMI_GPR8;
+ case 16:
+ return PMI_GPR16;
+ case 32:
+ return PMI_GPR32;
+ case 64:
+ return PMI_GPR64;
+ break;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ } else if (Ty.isScalar()) {
+ switch (Ty.getSizeInBits()) {
+ case 32:
+ return PMI_FP32;
+ case 64:
+ return PMI_FP64;
+ default:
+ llvm_unreachable("Unsupported register size.");
}
} else {
switch (Ty.getSizeInBits()) {
case 128:
- ValMapIdx = VMI_3OpsVec128Idx;
- break;
+ return PMI_VEC128;
case 256:
- ValMapIdx = VMI_3OpsVec256Idx;
- break;
+ return PMI_VEC256;
case 512:
- ValMapIdx = VMI_3OpsVec512Idx;
- break;
+ return PMI_VEC512;
default:
llvm_unreachable("Unsupported register size.");
}
}
- return InstructionMapping{DefaultMappingID, 1, &ValMappings[ValMapIdx],
- NumOperands};
+ return PMI_None;
+}
+
+RegisterBankInfo::InstructionMapping
+X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned NumOperands = MI.getNumOperands();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ if (NumOperands != 3 || (Ty != MRI.getType(MI.getOperand(1).getReg())) ||
+ (Ty != MRI.getType(MI.getOperand(2).getReg())))
+ llvm_unreachable("Unsupported operand mapping yet.");
+
+ auto Mapping = getValueMapping(getPartialMappingIdx(Ty, isFP), 3);
+ return InstructionMapping{DefaultMappingID, 1, Mapping, NumOperands};
}
RegisterBankInfo::InstructionMapping
X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
auto Opc = MI.getOpcode();
// Try the default logic for non-generic instructions that are either copies
switch (Opc) {
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
- return getOperandsMapping(MI, false);
+ return getSameOperandsMapping(MI, false);
break;
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
- return getOperandsMapping(MI, true);
+ return getSameOperandsMapping(MI, true);
break;
default:
- return InstructionMapping{};
+ break;
+ }
+
+ unsigned NumOperands = MI.getNumOperands();
+ unsigned Cost = 1; // set dafault cost
+
+ // Track the bank of each register.
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ auto &MO = MI.getOperand(Idx);
+ if (!MO.isReg())
+ continue;
+
+ // As a top-level guess, use NotFP mapping (all scalars in GPRs)
+ OpRegBankIdx[Idx] = getPartialMappingIdx(MRI.getType(MO.getReg()), false);
+ }
+
+ // Finally construct the computed mapping.
+ RegisterBankInfo::InstructionMapping Mapping =
+ InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ if (MI.getOperand(Idx).isReg()) {
+ auto Mapping = getValueMapping(OpRegBankIdx[Idx], 1);
+ if (!Mapping->isValid())
+ return InstructionMapping();
+
+ OpdsMapping[Idx] = Mapping;
+ }
}
- return InstructionMapping{};
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
}
namespace llvm {
+class LLT;
+
class X86GenRegisterBankInfo : public RegisterBankInfo {
protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "X86GenRegisterBank.inc"
+#define GET_TARGET_REGBANK_INFO_CLASS
+#include "X86GenRegisterBankInfo.def"
+
static RegisterBankInfo::PartialMapping PartMappings[];
static RegisterBankInfo::ValueMapping ValMappings[];
-#define GET_TARGET_REGBANK_CLASS
-#include "X86GenRegisterBank.inc"
+ static PartialMappingIdx getPartialMappingIdx(const LLT &Ty, bool isFP);
+ static const RegisterBankInfo::ValueMapping *
+ getValueMapping(PartialMappingIdx Idx, unsigned NumOperands);
};
class TargetRegisterInfo;
/// Get an instruction mapping.
/// \return An InstructionMappings with a statically allocated
/// OperandsMapping.
- static InstructionMapping getOperandsMapping(const MachineInstr &MI,
- bool isFP);
+ static InstructionMapping getSameOperandsMapping(const MachineInstr &MI,
+ bool isFP);
public:
X86RegisterBankInfo(const TargetRegisterInfo &TRI);
ret <4 x float> %ret
}
+ define i8 @test_load_i8(i8* %p1) {
+ %r = load i8, i8* %p1
+ ret i8 %r
+ }
+
+ define i16 @test_load_i16(i16* %p1) {
+ %r = load i16, i16* %p1
+ ret i16 %r
+ }
+
+ define i32 @test_load_i32(i32* %p1) {
+ %r = load i32, i32* %p1
+ ret i32 %r
+ }
+
+ define i64 @test_load_i64(i64* %p1) {
+ %r = load i64, i64* %p1
+ ret i64 %r
+ }
+
+ define float @test_load_float(float* %p1) {
+ %r = load float, float* %p1
+ ret float %r
+ }
+
+ define double @test_load_double(double* %p1) {
+ %r = load double, double* %p1
+ ret double %r
+ }
+
+ define <4 x i32> @test_load_v4i32(<4 x i32>* %p1) {
+ %r = load <4 x i32>, <4 x i32>* %p1, align 16
+ ret <4 x i32> %r
+ }
+
+ define i32* @test_store_i32(i32 %val, i32* %p1) {
+ store i32 %val, i32* %p1
+ ret i32* %p1
+ }
+
+ define i64* @test_store_i64(i64 %val, i64* %p1) {
+ store i64 %val, i64* %p1
+ ret i64* %p1
+ }
+
+ define float* @test_store_float(float %val, float* %p1) {
+ store float %val, float* %p1
+ ret float* %p1
+ }
+
+ define double* @test_store_double(double %val, double* %p1) {
+ store double %val, double* %p1
+ ret double* %p1
+ }
+
...
---
name: test_add_i8
RET 0, implicit %xmm0
...
+---
+name: test_load_i8
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_i8
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1)
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+name: test_load_i16
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_i16
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1)
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+name: test_load_i32
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_i32
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
+---
+name: test_load_i64
+alignment: 4
+exposesReturnsTwice: false
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_i64
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
+ %rax = COPY %1(s64)
+ RET 0, implicit %rax
+
+...
+---
+name: test_load_float
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_float
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
+ %xmm0 = COPY %1(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_load_double
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_double
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
+ %xmm0 = COPY %1(s64)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_load_v4i32
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_load_v4i32
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: vecr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1)
+ %xmm0 = COPY %1(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+name: test_store_i32
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_store_i32
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %rsi
+
+ %0(s32) = COPY %edi
+ %1(p0) = COPY %rsi
+ G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+name: test_store_i64
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_store_i64
+# CHECK: registers:
+# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 1, class: gpr }
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %rsi
+
+ %0(s64) = COPY %rdi
+ %1(p0) = COPY %rsi
+ G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+name: test_store_float
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_store_float
+# CHECK: registers:
+# CHECK: - { id: 0, class: vecr }
+# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 2, class: gpr }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(s32) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ ; CHECK: %2(s32) = COPY %0(s32)
+ ; CHECK: G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1)
+ G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+name: test_store_double
+alignment: 4
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK-LABEL: name: test_store_double
+# CHECK: registers:
+# CHECK: - { id: 0, class: vecr }
+# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 2, class: gpr }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(s64) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ ; CHECK: %2(s64) = COPY %0(s64)
+ ; CHECK: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1)
+ G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
%ret = fsub <4 x float> %arg1, %arg2
ret <4 x float> %ret
}
+
+define i32 @test_copy_float(float %val) {
+; SSE-LABEL: test_copy_float:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_copy_float:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: vmovd %xmm0, %eax
+; ALL_AVX-NEXT: retq
+ %r = bitcast float %val to i32
+ ret i32 %r
+}
+
+define float @test_copy_i32(i32 %val) {
+; SSE-LABEL: test_copy_i32:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_copy_i32:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: vmovd %edi, %xmm0
+; ALL_AVX-NEXT: retq
+ %r = bitcast i32 %val to float
+ ret float %r
+}
+
ret double %arg2
}
+
+define i32 * @test_memop_i32(i32 * %p1) {
+; ALL-LABEL:name: test_memop_i32
+;X64 liveins: %rdi
+;X64: %0(p0) = COPY %rdi
+;X64-NEXT: %rax = COPY %0(p0)
+;X64-NEXT: RET 0, implicit %rax
+
+;X32: fixedStack:
+;X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+;X32: %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
+;X32-NEXT: %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
+;X32-NEXT: %eax = COPY %0(p0)
+;X32-NEXT: RET 0, implicit %eax
+
+ ret i32 * %p1;
+}
\ No newline at end of file
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL
+
+
+define i8 @test_load_i8(i8 * %p1) {
+; ALL-LABEL: test_load_i8:
+; ALL: # BB#0:
+; ALL-NEXT: movb (%rdi), %al
+; ALL-NEXT: retq
+ %r = load i8, i8* %p1
+ ret i8 %r
+}
+
+define i16 @test_load_i16(i16 * %p1) {
+; ALL-LABEL: test_load_i16:
+; ALL: # BB#0:
+; ALL-NEXT: movzwl (%rdi), %eax
+; ALL-NEXT: retq
+ %r = load i16, i16* %p1
+ ret i16 %r
+}
+
+define i32 @test_load_i32(i32 * %p1) {
+; ALL-LABEL: test_load_i32:
+; ALL: # BB#0:
+; ALL-NEXT: movl (%rdi), %eax
+; ALL-NEXT: retq
+ %r = load i32, i32* %p1
+ ret i32 %r
+}
+
+define i64 @test_load_i64(i64 * %p1) {
+; ALL-LABEL: test_load_i64:
+; ALL: # BB#0:
+; ALL-NEXT: movq (%rdi), %rax
+; ALL-NEXT: retq
+ %r = load i64, i64* %p1
+ ret i64 %r
+}
+
+define float @test_load_float(float * %p1) {
+; SSE-LABEL: test_load_float:
+; SSE: # BB#0:
+; SSE-NEXT: movl (%rdi), %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_load_float:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: movl (%rdi), %eax
+; ALL_AVX-NEXT: vmovd %eax, %xmm0
+; ALL_AVX-NEXT: retq
+ %r = load float, float* %p1
+ ret float %r
+}
+
+define double @test_load_double(double * %p1) {
+; SSE-LABEL: test_load_double:
+; SSE: # BB#0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_load_double:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: movq (%rdi), %rax
+; ALL_AVX-NEXT: vmovq %rax, %xmm0
+; ALL_AVX-NEXT: retq
+ %r = load double, double* %p1
+ ret double %r
+}
+
+define <4 x i32> @test_load_v4i32_noalign(<4 x i32> * %p1) {
+; SSE-LABEL: test_load_v4i32_noalign:
+; SSE: # BB#0:
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_load_v4i32_noalign:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: vmovups (%rdi), %xmm0
+; ALL_AVX-NEXT: retq
+ %r = load <4 x i32>, <4 x i32>* %p1, align 1
+ ret <4 x i32> %r
+}
+
+define <4 x i32> @test_load_v4i32_align(<4 x i32> * %p1) {
+; SSE-LABEL: test_load_v4i32_align:
+; SSE: # BB#0:
+; SSE-NEXT: movaps (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_load_v4i32_align:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: vmovaps (%rdi), %xmm0
+; ALL_AVX-NEXT: retq
+ %r = load <4 x i32>, <4 x i32>* %p1, align 16
+ ret <4 x i32> %r
+}
+
+define i32 * @test_store_i32(i32 %val, i32 * %p1) {
+; ALL-LABEL: test_store_i32:
+; ALL: # BB#0:
+; ALL-NEXT: movl %edi, (%rsi)
+; ALL-NEXT: movq %rsi, %rax
+; ALL-NEXT: retq
+ store i32 %val, i32* %p1
+ ret i32 * %p1;
+}
+
+define i64 * @test_store_i64(i64 %val, i64 * %p1) {
+; ALL-LABEL: test_store_i64:
+; ALL: # BB#0:
+; ALL-NEXT: movq %rdi, (%rsi)
+; ALL-NEXT: movq %rsi, %rax
+; ALL-NEXT: retq
+ store i64 %val, i64* %p1
+ ret i64 * %p1;
+}
+
+define float * @test_store_float(float %val, float * %p1) {
+; SSE-LABEL: test_store_float:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movl %eax, (%rdi)
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_store_float:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: vmovd %xmm0, %eax
+; ALL_AVX-NEXT: movl %eax, (%rdi)
+; ALL_AVX-NEXT: movq %rdi, %rax
+; ALL_AVX-NEXT: retq
+ store float %val, float* %p1
+ ret float * %p1;
+}
+
+define double * @test_store_double(double %val, double * %p1) {
+; SSE-LABEL: test_store_double:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: movq %rax, (%rdi)
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: retq
+;
+; ALL_AVX-LABEL: test_store_double:
+; ALL_AVX: # BB#0:
+; ALL_AVX-NEXT: vmovq %xmm0, %rax
+; ALL_AVX-NEXT: movq %rax, (%rdi)
+; ALL_AVX-NEXT: movq %rdi, %rax
+; ALL_AVX-NEXT: retq
+ store double %val, double* %p1
+ ret double * %p1;
+}
+
%ret = fsub <4 x float> %arg1, %arg2
ret <4 x float> %ret
}
+
+ define i8 @test_load_i8(i8* %p1) {
+ %r = load i8, i8* %p1
+ ret i8 %r
+ }
+
+ define i16 @test_load_i16(i16* %p1) {
+ %r = load i16, i16* %p1
+ ret i16 %r
+ }
+
+ define i32 @test_load_i32(i32* %p1) {
+ %r = load i32, i32* %p1
+ ret i32 %r
+ }
+
+ define i64 @test_load_i64(i64* %p1) {
+ %r = load i64, i64* %p1
+ ret i64 %r
+ }
+
+ define float @test_load_float(float* %p1) {
+ %r = load float, float* %p1
+ ret float %r
+ }
+
+ define float @test_load_float_vecreg(float* %p1) {
+ %r = load float, float* %p1
+ ret float %r
+ }
+
+
+ define double @test_load_double(double* %p1) {
+ %r = load double, double* %p1
+ ret double %r
+ }
+
+ define double @test_load_double_vecreg(double* %p1) {
+ %r = load double, double* %p1
+ ret double %r
+ }
+
+ define <4 x i32> @test_load_v4i32_noalign(<4 x i32>* %p1) {
+ %r = load <4 x i32>, <4 x i32>* %p1, align 1
+ ret <4 x i32> %r
+ }
+
+ define <4 x i32> @test_load_v4i32_align(<4 x i32>* %p1) {
+ %r = load <4 x i32>, <4 x i32>* %p1, align 16
+ ret <4 x i32> %r
+ }
+
+ define i32* @test_store_i32(i32 %val, i32* %p1) {
+ store i32 %val, i32* %p1
+ ret i32* %p1
+ }
+
+ define i64* @test_store_i64(i64 %val, i64* %p1) {
+ store i64 %val, i64* %p1
+ ret i64* %p1
+ }
+
+ define float* @test_store_float(float %val, float* %p1) {
+ store float %val, float* %p1
+ ret float* %p1
+ }
+
+ define float* @test_store_float_vec(float %val, float* %p1) {
+ store float %val, float* %p1
+ ret float* %p1
+ }
+
+ define double* @test_store_double(double %val, double* %p1) {
+ store double %val, double* %p1
+ ret double* %p1
+ }
+
+ define double* @test_store_double_vec(double %val, double* %p1) {
+ store double %val, double* %p1
+ ret double* %p1
+ }
+
+ define <4 x i32>* @test_store_v4i32_align(<4 x i32> %val, <4 x i32>* %p1) {
+ store <4 x i32> %val, <4 x i32>* %p1, align 16
+ ret <4 x i32>* %p1
+ }
+
+ define <4 x i32>* @test_store_v4i32_noalign(<4 x i32> %val, <4 x i32>* %p1) {
+ store <4 x i32> %val, <4 x i32>* %p1, align 1
+ ret <4 x i32>* %p1
+ }
+
...
---
RET 0, implicit %xmm0
...
+---
+# ALL-LABEL: name: test_load_i8
+name: test_load_i8
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr8 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.p1)
+# ALL: %al = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1)
+ %al = COPY %1(s8)
+ RET 0, implicit %al
+
+...
+---
+# ALL-LABEL: name: test_load_i16
+name: test_load_i16
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr16 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = MOV16rm %0, 1, _, 0, _ :: (load 2 from %ir.p1)
+# ALL: %ax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1)
+ %ax = COPY %1(s16)
+ RET 0, implicit %ax
+
+...
+---
+# ALL-LABEL: name: test_load_i32
+name: test_load_i32
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr32 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
+# ALL: %eax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
+ %eax = COPY %1(s32)
+ RET 0, implicit %eax
+
+...
+---
+# ALL-LABEL: name: test_load_i64
+name: test_load_i64
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
+ %rax = COPY %1(s64)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_load_float
+name: test_load_float
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr32 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
+# ALL: %xmm0 = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
+ %xmm0 = COPY %1(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+# ALL-LABEL: name: test_load_float_vecreg
+name: test_load_float_vecreg
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# NO_AVX512F: - { id: 1, class: fr32 }
+# AVX512ALL: - { id: 1, class: fr32x }
+ - { id: 0, class: gpr }
+ - { id: 1, class: vecr }
+# ALL: %0 = COPY %rdi
+# SSE: %1 = MOVSSrm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
+# AVX: %1 = VMOVSSrm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
+# AVX512ALL: %1 = VMOVSSZrm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
+# ALL: %xmm0 = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
+ %xmm0 = COPY %1(s32)
+ RET 0, implicit %xmm0
+
+...
+---
+# ALL-LABEL: name: test_load_double
+name: test_load_double
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
+# ALL: %xmm0 = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
+ %xmm0 = COPY %1(s64)
+ RET 0, implicit %xmm0
+
+...
+---
+# ALL-LABEL: name: test_load_double_vecreg
+name: test_load_double_vecreg
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# NO_AVX512F: - { id: 1, class: fr64 }
+# AVX512ALL: - { id: 1, class: fr64x }
+ - { id: 0, class: gpr }
+ - { id: 1, class: vecr }
+# ALL: %0 = COPY %rdi
+# SSE: %1 = MOVSDrm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
+# AVX: %1 = VMOVSDrm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
+# AVX512ALL: %1 = VMOVSDZrm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
+# ALL: %xmm0 = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
+ %xmm0 = COPY %1(s64)
+ RET 0, implicit %xmm0
+
+...
+---
+# ALL-LABEL: name: test_load_v4i32_noalign
+name: test_load_v4i32_noalign
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# NO_AVX512F: - { id: 1, class: vr128 }
+# AVX512ALL: - { id: 1, class: vr128x }
+ - { id: 0, class: gpr }
+ - { id: 1, class: vecr }
+# ALL: %0 = COPY %rdi
+# SSE: %1 = MOVUPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
+# AVX: %1 = VMOVUPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
+# AVX512F: %1 = VMOVUPSZ128rm_NOVLX %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
+# AVX512VL: %1 = VMOVUPSZ128rm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
+# ALL: %xmm0 = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1)
+ %xmm0 = COPY %1(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+# ALL-LABEL: name: test_load_v4i32_align
+name: test_load_v4i32_align
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# NO_AVX512F: - { id: 1, class: vr128 }
+# AVX512ALL: - { id: 1, class: vr128x }
+ - { id: 0, class: gpr }
+ - { id: 1, class: vecr }
+# ALL: %0 = COPY %rdi
+# SSE: %1 = MOVAPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1)
+# AVX: %1 = VMOVAPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1)
+# AVX512F: %1 = VMOVAPSZ128rm_NOVLX %0, 1, _, 0, _ :: (load 16 from %ir.p1)
+# AVX512VL: %1 = VMOVAPSZ128rm %0, 1, _, 0, _ :: (load 16 from %ir.p1)
+# ALL: %xmm0 = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi
+
+ %0(p0) = COPY %rdi
+ %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1)
+ %xmm0 = COPY %1(<4 x s32>)
+ RET 0, implicit %xmm0
+
+...
+---
+# ALL-LABEL: name: test_store_i32
+name: test_store_i32
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr32 }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %edi
+# ALL: %1 = COPY %rsi
+# ALL: MOV32mr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %edi, %rsi
+
+ %0(s32) = COPY %edi
+ %1(p0) = COPY %rsi
+ G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_i64
+name: test_store_i64
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: gr64 }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: gpr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %rdi
+# ALL: %1 = COPY %rsi
+# ALL: MOV64mr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %rsi
+
+ %0(s64) = COPY %rdi
+ %1(p0) = COPY %rsi
+ G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_float
+name: test_store_float
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: fr32x }
+# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 2, class: gr32 }
+ - { id: 0, class: vecr }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+# ALL: %0 = COPY %xmm0
+# ALL: %1 = COPY %rdi
+# ALL: %2 = COPY %0
+# ALL: MOV32mr %1, 1, _, 0, _, %2 :: (store 4 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(s32) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ %2(s32) = COPY %0(s32)
+ G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_float_vec
+name: test_store_float_vec
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# NO_AVX512F: - { id: 0, class: fr32 }
+# AVX512ALL: - { id: 0, class: fr32x }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: vecr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %xmm0
+# ALL: %1 = COPY %rdi
+# SSE: MOVSSmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
+# AVX: VMOVSSmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
+# AVX512ALL: VMOVSSZmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(s32) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_double
+name: test_store_double
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# ALL: - { id: 0, class: fr64x }
+# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 2, class: gr64 }
+ - { id: 0, class: vecr }
+ - { id: 1, class: gpr }
+ - { id: 2, class: gpr }
+# ALL: %0 = COPY %xmm0
+# ALL: %1 = COPY %rdi
+# ALL: %2 = COPY %0
+# ALL: MOV64mr %1, 1, _, 0, _, %2 :: (store 8 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(s64) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ %2(s64) = COPY %0(s64)
+ G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_double_vec
+name: test_store_double_vec
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# NO_AVX512F: - { id: 0, class: fr64 }
+# AVX512ALL: - { id: 0, class: fr64x }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: vecr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %xmm0
+# ALL: %1 = COPY %rdi
+# SSE: MOVSDmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
+# AVX: VMOVSDmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
+# AVX512ALL: VMOVSDZmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(s64) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_v4i32_align
+name: test_store_v4i32_align
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# NO_AVX512F: - { id: 0, class: vr128 }
+# AVX512ALL: - { id: 0, class: vr128x }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: vecr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %xmm0
+# ALL: %1 = COPY %rdi
+# SSE: MOVAPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
+# AVX: VMOVAPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
+# AVX512F: VMOVAPSZ128mr_NOVLX %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
+# AVX512VL: VMOVAPSZ128mr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 16)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name: test_store_v4i32_noalign
+name: test_store_v4i32_noalign
+alignment: 4
+legalized: true
+regBankSelected: true
+registers:
+# NO_AVX512F: - { id: 0, class: vr128 }
+# AVX512ALL: - { id: 0, class: vr128x }
+# ALL: - { id: 1, class: gr64 }
+ - { id: 0, class: vecr }
+ - { id: 1, class: gpr }
+# ALL: %0 = COPY %xmm0
+# ALL: %1 = COPY %rdi
+# SSE: MOVUPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
+# AVX: VMOVUPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
+# AVX512F: VMOVUPSZ128mr_NOVLX %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
+# AVX512VL: VMOVUPSZ128mr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
+# ALL: %rax = COPY %1
+body: |
+ bb.1 (%ir-block.0):
+ liveins: %rdi, %xmm0
+
+ %0(<4 x s32>) = COPY %xmm0
+ %1(p0) = COPY %rdi
+ G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 1)
+ %rax = COPY %1(p0)
+ RET 0, implicit %rax
+
+...