From adf1cc1bdf0e0c441900fc0764f0609891dd796c Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Tue, 22 May 2012 18:30:53 +0000 Subject: [PATCH] Started to segment the final code generation into: - instruction selection phase - register allocation - mov insertion - instruction scheduling - instrucition encoding Started with the instruction selection engine --- backend/src/CMakeLists.txt | 2 + backend/src/backend/gen_context.hpp | 2 +- backend/src/backend/gen_eu.hpp | 14 - backend/src/backend/gen_selector.cpp | 26 ++ backend/src/backend/gen_selector.hpp | 576 +++++++++++++++++++++++++++++++++++ 5 files changed, 605 insertions(+), 15 deletions(-) create mode 100644 backend/src/backend/gen_selector.cpp create mode 100644 backend/src/backend/gen_selector.hpp diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index c6c71d2..0fa4efb 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -86,6 +86,8 @@ else (GBE_USE_BLOB) backend/sim/simulator_str.cpp backend/sim/sim_vector_str.cpp backend/sim/sim_vector.h + backend/gen_selector.cpp + backend/gen_selector.hpp backend/gen_context.cpp backend/gen_context.hpp backend/gen_program.cpp diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index b661516..f0845fb 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -64,7 +64,7 @@ namespace gbe void patchBranches(void); /*! Return the Gen register from the GenIR one */ GenReg genReg(ir::Register reg, ir::Type type = ir::TYPE_FLOAT); - /*! Compute the second instruction when using SIMD8 with Qn (n in 2,3,4) */ + /*! Compute the quarterth register part when using SIMD8 with Qn (n in 2,3,4) */ GenReg genRegQn(ir::Register reg, uint32_t quarter, ir::Type type = ir::TYPE_FLOAT); /*! Bool registers will use scalar words. So we will consider them as * scalars in Gen backend diff --git a/backend/src/backend/gen_eu.hpp b/backend/src/backend/gen_eu.hpp index e6b17b9..2e19368 100644 --- a/backend/src/backend/gen_eu.hpp +++ b/backend/src/backend/gen_eu.hpp @@ -103,7 +103,6 @@ namespace gbe this->width = width; this->hstride = hstride; this->address_mode = GEN_ADDRESS_DIRECT; - this->pad0 = 0; } static INLINE GenReg Qn(GenReg reg, uint32_t quarter) { @@ -319,10 +318,6 @@ namespace gbe return immediate; } - static INLINE GenReg address(GenReg reg) { - return immuw(reg.nr * GEN_REG_SIZE + reg.subnr); - } - static INLINE GenReg f1grf(uint32_t nr, uint32_t subnr) { return vec1(GEN_GENERAL_REGISTER_FILE, nr, subnr); } @@ -383,10 +378,6 @@ namespace gbe return vec8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_NULL, 0); } - static INLINE GenReg address(uint32_t subnr) { - return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ADDRESS, subnr); - } - static INLINE GenReg acc(void) { return vec8(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_ACCUMULATOR, 0); } @@ -435,10 +426,6 @@ namespace gbe return reg; } - static INLINE bool same_reg(GenReg r1, GenReg r2) { - return r1.file == r2.file && r1.nr == r2.nr; - } - uint32_t type:4; uint32_t file:2; uint32_t nr:8; @@ -449,7 +436,6 @@ namespace gbe uint32_t width:3; /* src only, align1 only */ uint32_t hstride:2; /* align1 only */ uint32_t address_mode:1; /* relative addressing, hopefully! */ - uint32_t pad0:1; union { float f; diff --git a/backend/src/backend/gen_selector.cpp b/backend/src/backend/gen_selector.cpp new file mode 100644 index 0000000..fb85f4a --- /dev/null +++ b/backend/src/backend/gen_selector.cpp @@ -0,0 +1,26 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file gen_instruction_selection.cpp + * \author Benjamin Segovia + */ + +#include "gen_instruction_selection.hpp" + diff --git a/backend/src/backend/gen_selector.hpp b/backend/src/backend/gen_selector.hpp new file mode 100644 index 0000000..7c1f78f --- /dev/null +++ b/backend/src/backend/gen_selector.hpp @@ -0,0 +1,576 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file gen_instruction_selection.hpp + * \author Benjamin Segovia + */ + +#ifndef __GEN_SELECTOR_HPP__ +#define __GEN_SELECTOR_HPP__ + +#include "ir/register.hpp" +#include "ir/instruction.hpp" +#include "backend/gen_defs.hpp" +#include "sys/vector.hpp" + +namespace gbe +{ + /*! The state for each instruction */ + struct GenInstructionState + { + uint32_t execWidth:6; + uint32_t quarterControl:2; + uint32_t noMask:1; + uint32_t flag:1; + uint32_t subFlag:1; + uint32_t predicate:4; + uint32_t inversePredicate:1; + }; + + /*! This is a book-keeping structure that is not exactly a virtual register + * and not exactly a physical register. Basically, it is a Gen register + * *before* the register allocation i.e. it contains the info to get it + * properly encoded later but it is not associated to a GRF, a flag or + * anything Gen related yet. + */ + struct SelectionReg + { + /*! Associated virtual register */ + ir::Register reg; + + /*! For immediates */ + union { + float f; + int32_t d; + uint32_t ud; + } immediate; + + uint32_t nr:8; //!< Just for some physical registers (acc, null) + uint32_t subnr:6; //!< Idem + uint32_t type:4; //!< Gen type + uint32_t file:2; //!< Register file + uint32_t negation:1; //!< For source + uint32_t absolute:1; //!< For source + uint32_t vstride:4; //!< Vertical stride + uint32_t width:3; //!< Width + uint32_t hstride:2; //!< Horizontal stride + uint32_t quarter:1; //!< To choose which part we want + + /*! Empty constructor */ + INLINE SelectionReg(void) {} + + /*! General constructor */ + INLINE SelectionReg(uint32_t file, + ir::Register reg, + uint32_t type, + uint32_t vstride, + uint32_t width, + uint32_t hstride) + { + this->type = type; + this->file = file; + this->reg = reg; + this->negation = 0; + this->absolute = 0; + this->vstride = vstride; + this->width = width; + this->hstride = hstride; + this->quarter = 0; + } + + /*! For specific physical registers only (acc, null) */ + INLINE SelectionReg(uint32_t file, + uint32_t nr, + uint32_t subnr, + uint32_t type, + uint32_t vstride, + uint32_t width, + uint32_t hstride) + { + this->type = type; + this->file = file; + this->nr = nr; + this->subnr = subnr; + this->negation = 0; + this->absolute = 0; + this->vstride = vstride; + this->width = width; + this->hstride = hstride; + this->quarter = 0; + } + + static INLINE SelectionReg Qn(SelectionReg reg, uint32_t quarter) { + if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register + return reg; + else { + reg.quarter = quarter; + return reg; + } + } + + static INLINE SelectionReg vec16(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1); + } + + static INLINE SelectionReg vec8(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1); + } + + static INLINE SelectionReg vec4(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_4, + GEN_WIDTH_4, + GEN_HORIZONTAL_STRIDE_1); + } + + static INLINE SelectionReg vec2(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_2, + GEN_WIDTH_2, + GEN_HORIZONTAL_STRIDE_1); + } + + static INLINE SelectionReg vec1(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_0, + GEN_WIDTH_1, + GEN_HORIZONTAL_STRIDE_0); + } + + static INLINE SelectionReg retype(SelectionReg reg, uint32_t type) { + reg.type = type; + return reg; + } + + static INLINE SelectionReg ud16(uint32_t file, ir::Register reg) { + return retype(vec16(file, reg), GEN_TYPE_UD); + } + + static INLINE SelectionReg ud8(uint32_t file, ir::Register reg) { + return retype(vec8(file, reg), GEN_TYPE_UD); + } + + static INLINE SelectionReg ud1(uint32_t file, ir::Register reg) { + return retype(vec1(file, reg), GEN_TYPE_UD); + } + + static INLINE SelectionReg d8(uint32_t file, ir::Register reg) { + return retype(vec8(file, reg), GEN_TYPE_D); + } + + static INLINE SelectionReg uw16(uint32_t file, ir::Register reg) { + return retype(vec16(file, reg), GEN_TYPE_UW); + } + + static INLINE SelectionReg uw8(uint32_t file, ir::Register reg) { + return retype(vec8(file, reg), GEN_TYPE_UW); + } + + static INLINE SelectionReg uw1(uint32_t file, ir::Register reg) { + return retype(vec1(file, reg), GEN_TYPE_UW); + } + + static INLINE SelectionReg ub16(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_UB, + GEN_VERTICAL_STRIDE_16, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_2); + } + + static INLINE SelectionReg ub8(uint32_t file, ir::Register reg) { + return SelectionReg(file, + reg, + GEN_TYPE_UB, + GEN_VERTICAL_STRIDE_16, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_2); + } + + static INLINE SelectionReg ub1(uint32_t file, ir::Register reg) { + return retype(vec1(file, reg), GEN_TYPE_UB); + } + + static INLINE SelectionReg unpacked_uw(ir::Register reg) { + return SelectionReg(GEN_GENERAL_REGISTER_FILE, + reg, + GEN_TYPE_UW, + GEN_VERTICAL_STRIDE_16, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_2); + } + + static INLINE SelectionReg unpacked_ub(ir::Register reg) { + return SelectionReg(GEN_GENERAL_REGISTER_FILE, + reg, + GEN_TYPE_UB, + GEN_VERTICAL_STRIDE_32, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_4); + } + + static INLINE SelectionReg imm(uint32_t type) { + return SelectionReg(GEN_IMMEDIATE_VALUE, + ir::Register(), + type, + GEN_VERTICAL_STRIDE_0, + GEN_WIDTH_1, + GEN_HORIZONTAL_STRIDE_0); + } + + static INLINE SelectionReg immf(float f) { + SelectionReg immediate = imm(GEN_TYPE_F); + immediate.immediate.f = f; + return immediate; + } + + static INLINE SelectionReg immd(int d) { + SelectionReg immediate = imm(GEN_TYPE_D); + immediate.immediate.d = d; + return immediate; + } + + static INLINE SelectionReg immud(uint32_t ud) { + SelectionReg immediate = imm(GEN_TYPE_UD); + immediate.immediate.ud = ud; + return immediate; + } + + static INLINE SelectionReg immuw(uint16_t uw) { + SelectionReg immediate = imm(GEN_TYPE_UW); + immediate.immediate.ud = uw | (uw << 16); + return immediate; + } + + static INLINE SelectionReg immw(int16_t w) { + SelectionReg immediate = imm(GEN_TYPE_W); + immediate.immediate.d = w | (w << 16); + return immediate; + } + + static INLINE SelectionReg immv(uint32_t v) { + SelectionReg immediate = imm(GEN_TYPE_V); + immediate.vstride = GEN_VERTICAL_STRIDE_0; + immediate.width = GEN_WIDTH_8; + immediate.hstride = GEN_HORIZONTAL_STRIDE_1; + immediate.immediate.ud = v; + return immediate; + } + + static INLINE SelectionReg immvf(uint32_t v) { + SelectionReg immediate = imm(GEN_TYPE_VF); + immediate.vstride = GEN_VERTICAL_STRIDE_0; + immediate.width = GEN_WIDTH_4; + immediate.hstride = GEN_HORIZONTAL_STRIDE_1; + immediate.immediate.ud = v; + return immediate; + } + + static INLINE SelectionReg immvf4(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) { + SelectionReg immediate = imm(GEN_TYPE_VF); + immediate.vstride = GEN_VERTICAL_STRIDE_0; + immediate.width = GEN_WIDTH_4; + immediate.hstride = GEN_HORIZONTAL_STRIDE_1; + immediate.immediate.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); + return immediate; + } + + static INLINE SelectionReg f1grf(ir::Register reg) { + return vec1(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg f2grf(ir::Register reg) { + return vec2(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg f4grf(ir::Register reg) { + return vec4(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg f8grf(ir::Register reg) { + return vec8(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg f16grf(ir::Register reg) { + return vec16(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg ud16grf(ir::Register reg) { + return ud16(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg ud8grf(ir::Register reg) { + return ud8(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg ud1grf(ir::Register reg) { + return ud1(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg uw1grf(ir::Register reg) { + return uw1(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg uw8grf(ir::Register reg) { + return uw8(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg uw16grf(ir::Register reg) { + return uw16(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg ub1grf(ir::Register reg) { + return ub1(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg ub8grf(ir::Register reg) { + return ub8(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg ub16grf(ir::Register reg) { + return ub16(GEN_GENERAL_REGISTER_FILE, reg); + } + + static INLINE SelectionReg null(void) { + return SelectionReg(GEN_ARCHITECTURE_REGISTER_FILE, + GEN_ARF_NULL, + 0, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1); + } + + static INLINE SelectionReg acc(void) { + return SelectionReg(GEN_ARCHITECTURE_REGISTER_FILE, + GEN_ARF_ACCUMULATOR, + 0, + GEN_TYPE_F, + GEN_VERTICAL_STRIDE_8, + GEN_WIDTH_8, + GEN_HORIZONTAL_STRIDE_1); + } + + static INLINE SelectionReg ip(void) { + return SelectionReg(GEN_ARCHITECTURE_REGISTER_FILE, + GEN_ARF_IP, + 0, + GEN_TYPE_D, + GEN_VERTICAL_STRIDE_4, + GEN_WIDTH_1, + GEN_HORIZONTAL_STRIDE_0); + } + + static INLINE SelectionReg notification1(void) { + return SelectionReg(GEN_ARCHITECTURE_REGISTER_FILE, + GEN_ARF_NOTIFICATION_COUNT, + 1, + GEN_TYPE_UD, + GEN_VERTICAL_STRIDE_0, + GEN_WIDTH_1, + GEN_HORIZONTAL_STRIDE_0); + } + + static INLINE SelectionReg flag(ir::Register reg) { + return uw1(GEN_ARCHITECTURE_REGISTER_FILE, reg); + } +#if 0 + static INLINE SelectionReg next(SelectionReg reg) { + return reg; + } +#endif + + static INLINE SelectionReg negate(SelectionReg reg) { + reg.negation ^= 1; + return reg; + } + + static INLINE SelectionReg abs(SelectionReg reg) { + reg.absolute = 1; + reg.negation = 0; + return reg; + } + }; + + /*! A selection instruction is also almost a Gen instruction but *before* the + * register allocation + */ + struct SelectionInstruction + { + /*! No more than 6 sources (used by typed writes) */ + enum { MAX_SRC_NUM = 6 }; + /*! No more than 4 destinations (used by samples and untyped reads) */ + enum { MAX_DST_NUM = 4 }; + /*! All destinations */ + SelectionReg dst[MAX_DST_NUM]; + /*! All sources */ + SelectionReg src[MAX_SRC_NUM]; + /*! State of the instruction (extra fields neeed for the encoding) */ + GenInstructionState state; + /*! Gen opcode */ + uint8_t opcode; + /*! For math instructions only */ + uint8_t function:4; + /*! For byte scattered reads / writes */ + uint16_t elemSize:4; + }; + + /*! A selection tile is the result of a m-to-n IR instruction to selection + * instructions mapping (the role of the instruction selection pass). + */ + struct SelectionTile + { + INLINE SelectionTile(void) : next(NULL) {} + /*! All the emitted instructions */ + vector insn; + /*! Registers output by the tile (i.e. produced values) */ + vector out; + /*! Registers required by the tile (i.e. input values) */ + vector in; + /*! Extra registers needed by the tile (only live in the tile) */ + vector tmp; + /*! Instructions actually captured by the tile (used by RA) */ + vector ir; + /*! We chain the tiles together */ + SelectionTile *next; + }; + + /*! Owns the selection engine */ + class GenContext; + + /*! Selection engine produces the pre-ISA instruction tiles */ + struct SelectionEngine + { + /*! simdWidth is the default width for the instructions */ + SelectionEngine(GenContext &ctx); + /*! Release everything */ + ~SelectionEngine(void); + /*! Implement the instruction selection itself */ + virtual void select(void) = 0; + /*! Size of the stack (should be large enough) */ + enum { MAX_STATE_NUM = 16 }; + /*! Push the current instruction state */ + INLINE void push(void) { + assert(stateNum < MAX_STATE_NUM); + stack[stateNum++] = curr; + } + /*! Pop the latest pushed state */ + INLINE void pop(void) { + assert(stateNum > 0); + curr = stack[--stateNum]; + } + /*! Owns this structure */ + GenContext &ctx; + /*! List of emitted tiles */ + SelectionTile *tileList; + /*! Currently processed tile */ + SelectionTile *tile; + /*! Current instruction state to use */ + GenInstructionState curr; + /*! State used to encode the instructions */ + GenInstructionState stack[MAX_STATE_NUM]; + /*! Number of states currently pushed */ + uint32_t stateNum; + /*! To make function prototypes more readable */ + typedef const SelectionReg &Reg; + +#define ALU1(OP) \ + INLINE void OP(Reg dst, Reg src) { ALU1(GEN_OPCODE_##OP, dst, src); } +#define ALU2(OP) \ + INLINE void OP(Reg dst, Reg src0, Reg src1) { ALU2(GEN_OPCODE_##OP, dst, src0, src1); } + ALU1(MOV) + ALU1(RNDZ) + ALU1(RNDE) + ALU2(SEL) + ALU1(NOT) + ALU2(AND) + ALU2(OR) + ALU2(XOR) + ALU2(SHR) + ALU2(SHL) + ALU2(RSR) + ALU2(RSL) + ALU2(ASR) + ALU2(ADD) + ALU2(MUL) + ALU1(FRC) + ALU1(RNDD) + ALU2(MAC) + ALU2(MACH) + ALU1(LZD) +#undef ALU1 +#undef ALU2 + + /*! Jump indexed instruction */ + void JMPI(Reg src); + /*! Compare instructions */ + void CMP(uint32_t conditional, Reg src0, Reg src1); + /*! EOT is used to finish GPGPU threads */ + void EOT(Reg src); + /*! No-op */ + void NOP(void); + /*! Wait instruction (used for the barrier) */ + void WAIT(void); + /*! Untyped read (up to 4 elements) */ + void UNTYPED_READ(Reg dst0, Reg addr, uint32_t bti); + void UNTYPED_READ(Reg dst0, Reg dst1, Reg addr, uint32_t bti); + void UNTYPED_READ(Reg dst0, Reg dst1, Reg dst2, Reg addr, uint32_t bti); + void UNTYPED_READ(Reg dst0, Reg dst1, Reg dst2, Reg dst3, Reg addr, uint32_t bti); + /*! Untyped write (up to 4 elements) */ + void UNTYPED_WRITE(Reg addr, Reg src0, uint32_t bti); + void UNTYPED_WRITE(Reg addr, Reg src0, Reg src1, uint32_t bti); + void UNTYPED_WRITE(Reg addr, Reg src0, Reg src1, Reg src2, uint32_t bti); + void UNTYPED_WRITE(Reg addr, Reg src0, Reg src1, Reg src2, Reg src3, uint32_t bti); + /*! Byte gather (for unaligned bytes, shorts and ints) */ + void BYTE_GATHER(Reg dst, Reg addr, uint32_t bti, uint32_t elemSize); + /*! Byte scatter (for unaligned bytes, shorts and ints) */ + void BYTE_SCATTER(Reg addr, Reg src, uint32_t bti, uint32_t elemSize); + /*! Extended math function */ + void MATH(Reg dst, uint32_t function, Reg src0, Reg src1); + /*! Encode unary instructions */ + void ALU1(uint32_t opcode, Reg dst, Reg src); + /*! Encode binary instructions */ + void ALU2(uint32_t opcode, Reg dst, Reg src0, Reg src1); + }; + + /*! This is a stupid one-to-many instruction selection */ + SelectionEngine *newPoorManSelectionEngine(void); + +} /* namespace gbe */ + +#endif /* __GEN_SELECTOR_HPP__ */ + -- 2.7.4