namespace llvm {
namespace exegesis {
-namespace {
-
// Returns an error if we cannot handle the memory references in this
// instruction.
-Error isInvalidMemoryInstr(const Instruction &Instr) {
+static Error isInvalidMemoryInstr(const Instruction &Instr) {
switch (Instr.Description->TSFlags & X86II::FormMask) {
default:
llvm_unreachable("Unknown FormMask value");
return llvm::Error::success();
}
-static unsigned GetX86FPFlags(const Instruction &Instr) {
+static unsigned getX86FPFlags(const Instruction &Instr) {
return Instr.Description->TSFlags & llvm::X86II::FPTypeMask;
}
+namespace {
class X86LatencySnippetGenerator : public LatencySnippetGenerator {
public:
using LatencySnippetGenerator::LatencySnippetGenerator;
llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const override {
- if (auto E = IsInvalidOpcode(Instr))
- return std::move(E);
-
- switch (GetX86FPFlags(Instr)) {
- case llvm::X86II::NotFP:
- return LatencySnippetGenerator::generateCodeTemplates(Instr);
- case llvm::X86II::ZeroArgFP:
- case llvm::X86II::OneArgFP:
- case llvm::X86II::SpecialFP:
- case llvm::X86II::CompareFP:
- case llvm::X86II::CondMovFP:
- return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
- case llvm::X86II::OneArgFPRW:
- case llvm::X86II::TwoArgFP:
- // These are instructions like
- // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
- // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
- // They are intrinsically serial and do not modify the state of the stack.
- return generateSelfAliasingCodeTemplates(Instr);
- default:
- llvm_unreachable("Unknown FP Type!");
- }
- }
+ generateCodeTemplates(const Instruction &Instr) const override;
};
+} // namespace
+llvm::Expected<std::vector<CodeTemplate>>
+X86LatencySnippetGenerator::generateCodeTemplates(
+ const Instruction &Instr) const {
+ if (auto E = IsInvalidOpcode(Instr))
+ return std::move(E);
+
+ switch (getX86FPFlags(Instr)) {
+ case llvm::X86II::NotFP:
+ return LatencySnippetGenerator::generateCodeTemplates(Instr);
+ case llvm::X86II::ZeroArgFP:
+ case llvm::X86II::OneArgFP:
+ case llvm::X86II::SpecialFP:
+ case llvm::X86II::CompareFP:
+ case llvm::X86II::CondMovFP:
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
+ case llvm::X86II::OneArgFPRW:
+ case llvm::X86II::TwoArgFP:
+ // These are instructions like
+ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
+ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
+ // They are intrinsically serial and do not modify the state of the stack.
+ return generateSelfAliasingCodeTemplates(Instr);
+ default:
+ llvm_unreachable("Unknown FP Type!");
+ }
+}
+
+namespace {
class X86UopsSnippetGenerator : public UopsSnippetGenerator {
public:
using UopsSnippetGenerator::UopsSnippetGenerator;
llvm::Expected<std::vector<CodeTemplate>>
- generateCodeTemplates(const Instruction &Instr) const override {
- if (auto E = IsInvalidOpcode(Instr))
- return std::move(E);
-
- switch (GetX86FPFlags(Instr)) {
- case llvm::X86II::NotFP:
- return UopsSnippetGenerator::generateCodeTemplates(Instr);
- case llvm::X86II::ZeroArgFP:
- case llvm::X86II::OneArgFP:
- case llvm::X86II::SpecialFP:
- return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
- case llvm::X86II::OneArgFPRW:
- case llvm::X86II::TwoArgFP:
- // These are instructions like
- // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
- // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
- // They are intrinsically serial and do not modify the state of the stack.
- // We generate the same code for latency and uops.
- return generateSelfAliasingCodeTemplates(Instr);
- case llvm::X86II::CompareFP:
- case llvm::X86II::CondMovFP:
- // We can compute uops for any FP instruction that does not grow or shrink
- // the stack (either do not touch the stack or push as much as they pop).
- return generateUnconstrainedCodeTemplates(
- Instr, "instruction does not grow/shrink the FP stack");
- default:
- llvm_unreachable("Unknown FP Type!");
- }
- }
+ generateCodeTemplates(const Instruction &Instr) const override;
};
+} // namespace
+
+llvm::Expected<std::vector<CodeTemplate>>
+X86UopsSnippetGenerator::generateCodeTemplates(
+ const Instruction &Instr) const {
+ if (auto E = IsInvalidOpcode(Instr))
+ return std::move(E);
+
+ switch (getX86FPFlags(Instr)) {
+ case llvm::X86II::NotFP:
+ return UopsSnippetGenerator::generateCodeTemplates(Instr);
+ case llvm::X86II::ZeroArgFP:
+ case llvm::X86II::OneArgFP:
+ case llvm::X86II::SpecialFP:
+ return llvm::make_error<BenchmarkFailure>("Unsupported x87 Instruction");
+ case llvm::X86II::OneArgFPRW:
+ case llvm::X86II::TwoArgFP:
+ // These are instructions like
+ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
+ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
+ // They are intrinsically serial and do not modify the state of the stack.
+ // We generate the same code for latency and uops.
+ return generateSelfAliasingCodeTemplates(Instr);
+ case llvm::X86II::CompareFP:
+ case llvm::X86II::CondMovFP:
+ // We can compute uops for any FP instruction that does not grow or shrink
+ // the stack (either do not touch the stack or push as much as they pop).
+ return generateUnconstrainedCodeTemplates(
+ Instr, "instruction does not grow/shrink the FP stack");
+ default:
+ llvm_unreachable("Unknown FP Type!");
+ }
+}
-static unsigned GetLoadImmediateOpcode(unsigned RegBitWidth) {
+static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
switch (RegBitWidth) {
case 8:
return llvm::X86::MOV8ri;
const llvm::APInt &Value) {
if (Value.getBitWidth() > RegBitWidth)
llvm_unreachable("Value must fit in the Register");
- return llvm::MCInstBuilder(GetLoadImmediateOpcode(RegBitWidth))
+ return llvm::MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
.addReg(Reg)
.addImm(Value.getZExtValue());
}
// Reserves some space on the stack, fills it with the content of the provided
// constant and provide methods to load the stack value into a register.
+namespace {
struct ConstantInliner {
explicit ConstantInliner(const llvm::APInt &Constant) : Constant_(Constant) {}
std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
- unsigned Opcode) {
- assert((RegBitWidth & 7) == 0 &&
- "RegBitWidth must be a multiple of 8 bits");
- initStack(RegBitWidth / 8);
- add(loadToReg(Reg, Opcode));
- add(releaseStackSpace(RegBitWidth / 8));
- return std::move(Instructions);
- }
+ unsigned Opcode);
- std::vector<llvm::MCInst> loadX87STAndFinalize(unsigned Reg) {
- initStack(kF80Bytes);
- add(llvm::MCInstBuilder(llvm::X86::LD_F80m)
- // Address = ESP
- .addReg(llvm::X86::RSP) // BaseReg
- .addImm(1) // ScaleAmt
- .addReg(0) // IndexReg
- .addImm(0) // Disp
- .addReg(0)); // Segment
- if (Reg != llvm::X86::ST0)
- add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
- add(releaseStackSpace(kF80Bytes));
- return std::move(Instructions);
- }
+ std::vector<llvm::MCInst> loadX87STAndFinalize(unsigned Reg);
- std::vector<llvm::MCInst> loadX87FPAndFinalize(unsigned Reg) {
- initStack(kF80Bytes);
- add(llvm::MCInstBuilder(llvm::X86::LD_Fp80m)
- .addReg(Reg)
- // Address = ESP
- .addReg(llvm::X86::RSP) // BaseReg
- .addImm(1) // ScaleAmt
- .addReg(0) // IndexReg
- .addImm(0) // Disp
- .addReg(0)); // Segment
- add(releaseStackSpace(kF80Bytes));
- return std::move(Instructions);
- }
+ std::vector<llvm::MCInst> loadX87FPAndFinalize(unsigned Reg);
- std::vector<llvm::MCInst> popFlagAndFinalize() {
- initStack(8);
- add(llvm::MCInstBuilder(llvm::X86::POPF64));
- return std::move(Instructions);
- }
+ std::vector<llvm::MCInst> popFlagAndFinalize();
private:
- static constexpr const unsigned kF80Bytes = 10; // 80 bits.
-
ConstantInliner &add(const llvm::MCInst &Inst) {
Instructions.push_back(Inst);
return *this;
}
- void initStack(unsigned Bytes) {
- assert(Constant_.getBitWidth() <= Bytes * 8 &&
- "Value does not have the correct size");
- const llvm::APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
- ? Constant_.sext(Bytes * 8)
- : Constant_;
- add(allocateStackSpace(Bytes));
- size_t ByteOffset = 0;
- for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
- add(fillStackSpace(
- llvm::X86::MOV32mi, ByteOffset,
- WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
- if (Bytes - ByteOffset >= 2) {
- add(fillStackSpace(
- llvm::X86::MOV16mi, ByteOffset,
- WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
- ByteOffset += 2;
- }
- if (Bytes - ByteOffset >= 1)
- add(fillStackSpace(
- llvm::X86::MOV8mi, ByteOffset,
- WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
- }
+ void initStack(unsigned Bytes);
+
+ static constexpr const unsigned kF80Bytes = 10; // 80 bits.
llvm::APInt Constant_;
std::vector<llvm::MCInst> Instructions;
};
+} // namespace
+
+std::vector<llvm::MCInst> ConstantInliner::loadAndFinalize(unsigned Reg,
+ unsigned RegBitWidth,
+ unsigned Opcode) {
+ assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits");
+ initStack(RegBitWidth / 8);
+ add(loadToReg(Reg, Opcode));
+ add(releaseStackSpace(RegBitWidth / 8));
+ return std::move(Instructions);
+}
+
+std::vector<llvm::MCInst> ConstantInliner::loadX87STAndFinalize(unsigned Reg) {
+ initStack(kF80Bytes);
+ add(llvm::MCInstBuilder(llvm::X86::LD_F80m)
+ // Address = ESP
+ .addReg(llvm::X86::RSP) // BaseReg
+ .addImm(1) // ScaleAmt
+ .addReg(0) // IndexReg
+ .addImm(0) // Disp
+ .addReg(0)); // Segment
+ if (Reg != llvm::X86::ST0)
+ add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
+ add(releaseStackSpace(kF80Bytes));
+ return std::move(Instructions);
+}
+
+std::vector<llvm::MCInst> ConstantInliner::loadX87FPAndFinalize(unsigned Reg) {
+ initStack(kF80Bytes);
+ add(llvm::MCInstBuilder(llvm::X86::LD_Fp80m)
+ .addReg(Reg)
+ // Address = ESP
+ .addReg(llvm::X86::RSP) // BaseReg
+ .addImm(1) // ScaleAmt
+ .addReg(0) // IndexReg
+ .addImm(0) // Disp
+ .addReg(0)); // Segment
+ add(releaseStackSpace(kF80Bytes));
+ return std::move(Instructions);
+}
+
+std::vector<llvm::MCInst> ConstantInliner::popFlagAndFinalize() {
+ initStack(8);
+ add(llvm::MCInstBuilder(llvm::X86::POPF64));
+ return std::move(Instructions);
+}
+
+void ConstantInliner::initStack(unsigned Bytes) {
+ assert(Constant_.getBitWidth() <= Bytes * 8 &&
+ "Value does not have the correct size");
+ const llvm::APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
+ ? Constant_.sext(Bytes * 8)
+ : Constant_;
+ add(allocateStackSpace(Bytes));
+ size_t ByteOffset = 0;
+ for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
+ add(fillStackSpace(
+ llvm::X86::MOV32mi, ByteOffset,
+ WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
+ if (Bytes - ByteOffset >= 2) {
+ add(fillStackSpace(
+ llvm::X86::MOV16mi, ByteOffset,
+ WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
+ ByteOffset += 2;
+ }
+ if (Bytes - ByteOffset >= 1)
+ add(fillStackSpace(
+ llvm::X86::MOV8mi, ByteOffset,
+ WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
+}
#include "X86GenExegesis.inc"
+namespace {
class ExegesisX86Target : public ExegesisTarget {
public:
ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
private:
- void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
- // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
- PM.add(llvm::createX86FloatingPointStackifierPass());
- }
+ void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override;
- unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
- if (!TT.isArch64Bit()) {
- // FIXME: This would require popping from the stack, so we would have to
- // add some additional setup code.
- return 0;
- }
- return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
- }
+ unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override;
unsigned getMaxMemoryAccessSize() const override { return 64; }
void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
- unsigned Offset) const override {
- assert(!isInvalidMemoryInstr(IT.Instr) &&
- "fillMemoryOperands requires a valid memory instruction");
- int MemOpIdx = X86II::getMemoryOperandNo(IT.Instr.Description->TSFlags);
- assert(MemOpIdx >= 0 && "invalid memory operand index");
- // getMemoryOperandNo() ignores tied operands, so we have to add them back.
- for (unsigned I = 0; I <= static_cast<unsigned>(MemOpIdx); ++I) {
- const auto &Op = IT.Instr.Operands[I];
- if (Op.isTied() && Op.getTiedToIndex() < I) {
- ++MemOpIdx;
- }
- }
- // Now fill in the memory operands.
- const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) {
- const auto Op = IT.Instr.Operands[OpIdx];
- assert(Op.isMemory() && Op.isExplicit() && "invalid memory pattern");
- IT.getValueFor(Op) = OpVal;
- };
- SetOp(MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
- SetOp(MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
- SetOp(MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
- SetOp(MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
- SetOp(MemOpIdx + 4, MCOperand::createReg(0)); // Segment
- }
+ unsigned Offset) const override;
std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
unsigned Reg,
- const llvm::APInt &Value) const override {
- if (llvm::X86::GR8RegClass.contains(Reg))
- return {loadImmediate(Reg, 8, Value)};
- if (llvm::X86::GR16RegClass.contains(Reg))
- return {loadImmediate(Reg, 16, Value)};
- if (llvm::X86::GR32RegClass.contains(Reg))
- return {loadImmediate(Reg, 32, Value)};
- if (llvm::X86::GR64RegClass.contains(Reg))
- return {loadImmediate(Reg, 64, Value)};
- ConstantInliner CI(Value);
- if (llvm::X86::VR64RegClass.contains(Reg))
- return CI.loadAndFinalize(Reg, 64, llvm::X86::MMX_MOVQ64rm);
- if (llvm::X86::VR128XRegClass.contains(Reg)) {
- if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
- return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQU32Z128rm);
- if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
- return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQUrm);
- return CI.loadAndFinalize(Reg, 128, llvm::X86::MOVDQUrm);
- }
- if (llvm::X86::VR256XRegClass.contains(Reg)) {
- if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
- return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQU32Z256rm);
- if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
- return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQUYrm);
- }
- if (llvm::X86::VR512RegClass.contains(Reg))
- if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
- return CI.loadAndFinalize(Reg, 512, llvm::X86::VMOVDQU32Zrm);
- if (llvm::X86::RSTRegClass.contains(Reg)) {
- return CI.loadX87STAndFinalize(Reg);
- }
- if (llvm::X86::RFP32RegClass.contains(Reg) ||
- llvm::X86::RFP64RegClass.contains(Reg) ||
- llvm::X86::RFP80RegClass.contains(Reg)) {
- return CI.loadX87FPAndFinalize(Reg);
- }
- if (Reg == llvm::X86::EFLAGS)
- return CI.popFlagAndFinalize();
- return {}; // Not yet implemented.
- }
+ const llvm::APInt &Value) const override;
std::unique_ptr<SnippetGenerator>
createLatencySnippetGenerator(const LLVMState &State) const override {
return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
}
};
-
} // namespace
+void ExegesisX86Target::addTargetSpecificPasses(
+ llvm::PassManagerBase &PM) const {
+ // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
+ PM.add(llvm::createX86FloatingPointStackifierPass());
+}
+
+unsigned
+ExegesisX86Target::getScratchMemoryRegister(const llvm::Triple &TT) const {
+ if (!TT.isArch64Bit()) {
+ // FIXME: This would require popping from the stack, so we would have to
+ // add some additional setup code.
+ return 0;
+ }
+ return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
+}
+
+void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
+ unsigned Reg,
+ unsigned Offset) const {
+ assert(!isInvalidMemoryInstr(IT.Instr) &&
+ "fillMemoryOperands requires a valid memory instruction");
+ int MemOpIdx = X86II::getMemoryOperandNo(IT.Instr.Description->TSFlags);
+ assert(MemOpIdx >= 0 && "invalid memory operand index");
+ // getMemoryOperandNo() ignores tied operands, so we have to add them back.
+ for (unsigned I = 0; I <= static_cast<unsigned>(MemOpIdx); ++I) {
+ const auto &Op = IT.Instr.Operands[I];
+ if (Op.isTied() && Op.getTiedToIndex() < I) {
+ ++MemOpIdx;
+ }
+ }
+ // Now fill in the memory operands.
+ const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) {
+ const auto Op = IT.Instr.Operands[OpIdx];
+ assert(Op.isMemory() && Op.isExplicit() && "invalid memory pattern");
+ IT.getValueFor(Op) = OpVal;
+ };
+ SetOp(MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
+ SetOp(MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
+ SetOp(MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
+ SetOp(MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
+ SetOp(MemOpIdx + 4, MCOperand::createReg(0)); // Segment
+}
+
+std::vector<llvm::MCInst>
+ExegesisX86Target::setRegTo(const llvm::MCSubtargetInfo &STI, unsigned Reg,
+ const llvm::APInt &Value) const {
+ if (llvm::X86::GR8RegClass.contains(Reg))
+ return {loadImmediate(Reg, 8, Value)};
+ if (llvm::X86::GR16RegClass.contains(Reg))
+ return {loadImmediate(Reg, 16, Value)};
+ if (llvm::X86::GR32RegClass.contains(Reg))
+ return {loadImmediate(Reg, 32, Value)};
+ if (llvm::X86::GR64RegClass.contains(Reg))
+ return {loadImmediate(Reg, 64, Value)};
+ ConstantInliner CI(Value);
+ if (llvm::X86::VR64RegClass.contains(Reg))
+ return CI.loadAndFinalize(Reg, 64, llvm::X86::MMX_MOVQ64rm);
+ if (llvm::X86::VR128XRegClass.contains(Reg)) {
+ if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
+ return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQU32Z128rm);
+ if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
+ return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQUrm);
+ return CI.loadAndFinalize(Reg, 128, llvm::X86::MOVDQUrm);
+ }
+ if (llvm::X86::VR256XRegClass.contains(Reg)) {
+ if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
+ return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQU32Z256rm);
+ if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
+ return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQUYrm);
+ }
+ if (llvm::X86::VR512RegClass.contains(Reg))
+ if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
+ return CI.loadAndFinalize(Reg, 512, llvm::X86::VMOVDQU32Zrm);
+ if (llvm::X86::RSTRegClass.contains(Reg)) {
+ return CI.loadX87STAndFinalize(Reg);
+ }
+ if (llvm::X86::RFP32RegClass.contains(Reg) ||
+ llvm::X86::RFP64RegClass.contains(Reg) ||
+ llvm::X86::RFP80RegClass.contains(Reg)) {
+ return CI.loadX87FPAndFinalize(Reg);
+ }
+ if (Reg == llvm::X86::EFLAGS)
+ return CI.popFlagAndFinalize();
+ return {}; // Not yet implemented.
+}
+
static ExegesisTarget *getTheExegesisX86Target() {
static ExegesisX86Target Target;
return &Target;