From c88ba36eab7c3bbb934addaa0d7707825a2b9c96 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 29 Oct 2016 04:05:06 +0000 Subject: [PATCH] AMDGPU: Use 1/2pi inline imm on VI I'm guessing at how it is supposed to be printed llvm-svn: 285490 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 8 ++- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 + llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 ++ .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 24 ++++++--- .../Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h | 6 ++- .../Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 6 ++- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++- llvm/test/CodeGen/AMDGPU/imm.ll | 60 ++++++++++++++++++++++ llvm/test/MC/AMDGPU/literals.s | 20 ++++---- 9 files changed, 113 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 90796f2..18098e7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -153,6 +153,12 @@ def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", "Has s_memrealtime instruction" >; +def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", + "HasInv2PiInlineImm", + "true", + "Has 1 / (2 * pi) as inline immediate" +>; + def Feature16BitInsts : SubtargetFeature<"16-bit-insts", "Has16BitInsts", "true", @@ -318,7 +324,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, - FeatureScalarStores + FeatureScalarStores, FeatureInv2PiInlineImm ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 2ba18dd..dc3c64d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -110,6 +110,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, Has16BitInsts(false), HasMovrel(false), HasVGPRIndexMode(false), + HasScalarStores(false), + HasInv2PiInlineImm(false), FlatAddressSpace(false), R600ALUInst(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 4167721..60142fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -106,6 +106,7 @@ protected: bool HasMovrel; bool HasVGPRIndexMode; bool HasScalarStores; + bool HasInv2PiInlineImm; bool FlatAddressSpace; bool R600ALUInst; bool CaymanISA; @@ -532,6 +533,10 @@ public: return HasScalarStores; } + bool hasInv2PiInlineImm() const { + return HasInv2PiInlineImm; + } + bool enableSIScheduler() const { return EnableSIScheduler; } diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index be9e74b..aec1008 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -316,7 +316,9 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo, STI, O); } -void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) { +void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { int32_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -341,11 +343,16 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) { O << "4.0"; else if (Imm == FloatToBits(-4.0f)) O << "-4.0"; + else if (Imm == 0x3e22f983 && + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) + O << "1/2pi"; else O << formatHex(static_cast(Imm)); } -void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) { +void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { int64_t SImm = static_cast(Imm); if (SImm >= -16 && SImm <= 64) { O << SImm; @@ -370,6 +377,9 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) { O << "4.0"; else if (Imm == DoubleToBits(-4.0)) O << "-4.0"; + else if (Imm == 0x3fc45f306dc9c882 && + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) + O << "1/2pi"; else { assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882); @@ -405,13 +415,13 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (RCID != -1) { unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID)); if (RCBits == 32) - printImmediate32(Op.getImm(), O); + printImmediate32(Op.getImm(), STI, O); else if (RCBits == 64) - printImmediate64(Op.getImm(), O); + printImmediate64(Op.getImm(), STI, O); else llvm_unreachable("Invalid register class size"); } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) { - printImmediate32(Op.getImm(), O); + printImmediate32(Op.getImm(), STI, O); } else { // We hit this for the immediate instruction bits that don't yet have a // custom printer. @@ -427,9 +437,9 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, int RCID = Desc.OpInfo[OpNo].RegClass; unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID)); if (RCBits == 32) - printImmediate32(FloatToBits(Op.getFPImm()), O); + printImmediate32(FloatToBits(Op.getFPImm()), STI, O); else if (RCBits == 64) - printImmediate64(DoubleToBits(Op.getFPImm()), O); + printImmediate64(DoubleToBits(Op.getFPImm()), STI, O); else llvm_unreachable("Invalid register class size"); } diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index 2c54e48..8a5ce60 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -81,8 +81,10 @@ private: void printRegOperand(unsigned RegNo, raw_ostream &O); void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printImmediate32(uint32_t Imm, raw_ostream &O); - void printImmediate64(uint64_t Imm, raw_ostream &O); + void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); + void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 5b128f0..cd16fe0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -117,7 +117,8 @@ static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) { if (Val == FloatToBits(-4.0f)) return 247; - if (AMDGPU::isVI(STI) && Val == 0x3e22f983) // 1/(2*pi) + if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi) + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; @@ -152,7 +153,8 @@ static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) { if (Val == DoubleToBits(-4.0)) return 247; - if (AMDGPU::isVI(STI) && Val == 0x3fc45f306dc9c882) // 1/(2*pi) + if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi) + STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d8c98e6..cdd98c6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1725,7 +1725,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { (DoubleToBits(2.0) == Val) || (DoubleToBits(-2.0) == Val) || (DoubleToBits(4.0) == Val) || - (DoubleToBits(-4.0) == Val); + (DoubleToBits(-4.0) == Val) || + (ST.hasInv2PiInlineImm() && Val == 0x3fc45f306dc9c882); } // The actual type of the operand does not seem to matter as long @@ -1746,7 +1747,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { (FloatToBits(2.0f) == Val) || (FloatToBits(-2.0f) == Val) || (FloatToBits(4.0f) == Val) || - (FloatToBits(-4.0f) == Val); + (FloatToBits(-4.0f) == Val) || + (ST.hasInv2PiInlineImm() && Val == 0x3e22f983); } bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll index 92a1468..f8e4be44 100644 --- a/llvm/test/CodeGen/AMDGPU/imm.ll +++ b/llvm/test/CodeGen/AMDGPU/imm.ll @@ -118,6 +118,24 @@ define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { ret void } + +; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f32: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e22f983{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 1/2pi{{$}} +; GCN: buffer_store_dword [[REG]] +define void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) { + store float 0x3FC45F3060000000, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}} +; GCN: buffer_store_dword [[REG]] +define void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) { + store float 0xBFC45F3060000000, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}store_literal_imm_f32: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000 ; GCN: buffer_store_dword [[REG]] @@ -418,6 +436,30 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { ret void } +; GCN-LABEL: {{^}}add_inline_imm_inv_2pi_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; SI-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30 +; SI: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} + +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1/2pi +; VI: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x3fc45f306dc9c882 + store double %y, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_m_inv_2pi_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30 +; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @add_m_inv_2pi_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xbfc45f306dc9c882 + store double %y, double addrspace(1)* %out + ret void +} ; GCN-LABEL: {{^}}add_inline_imm_1_f64: ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb @@ -599,6 +641,24 @@ define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { ret void } +; GCN-LABEL: {{^}}store_inv_2pi_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30 +; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inv_2pi_f64(double addrspace(1)* %out) { + store double 0x3fc45f306dc9c882, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f64: +; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882 +; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30 +; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) { + store double 0xbfc45f306dc9c882, double addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}store_literal_imm_f64: ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000 diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index a552e67..3367f71 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -429,11 +429,11 @@ v_and_b32_e32 v0, 0xffffffffffffffff, v1 v_trunc_f32_e32 v0, 0x3fc45f306dc9c882 // NOSICI: error: invalid operand for instruction -// VI: v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x64,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 1/2pi ; encoding: [0xf8,0x64,0x00,0x7e] v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 // SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] -// VI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xf8,0x38,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, 1/2pi ; encoding: [0xf8,0x38,0x00,0x7e] v_trunc_f32_e32 v0, 0x3e22f983 // SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e] @@ -445,11 +445,11 @@ v_fract_f64_e32 v[0:1], 0x3e22f983 v_trunc_f32_e64 v0, 0x3fc45f306dc9c882 // NOSICI: error: invalid operand for instruction -// VI: v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00] +// VI: v_fract_f64_e64 v[0:1], 1/2pi ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00] v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 // NOSICI: error: invalid operand for instruction -// VI: v_trunc_f32_e64 v0, 0x3e22f983 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00] +// VI: v_trunc_f32_e64 v0, 1/2pi ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00] v_trunc_f32_e64 v0, 0x3e22f983 // NOSICI: error: invalid operand for instruction @@ -457,21 +457,21 @@ v_trunc_f32_e64 v0, 0x3e22f983 v_fract_f64_e64 v[0:1], 0x3e22f983 // NOSICI: error: invalid operand for instruction -// VI: s_mov_b64 s[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x01,0x80,0xbe] +// VI: s_mov_b64 s[0:1], 1/2pi ; encoding: [0xf8,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335 // SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e] -// VI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xf8,0x02,0x00,0x26] +// VI: v_and_b32_e32 v0, 1/2pi, v1 ; encoding: [0xf8,0x02,0x00,0x26] v_and_b32_e32 v0, 0.159154943091895317852646485335, v1 // NOSICI: error: invalid operand for instruction -// VI: v_and_b32_e64 v0, 0x3e22f983, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, 1/2pi, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00] v_and_b32_e64 v0, 0.159154943091895317852646485335, v1 // SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f] -// VI: v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x64,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 1/2pi ; encoding: [0xf8,0x64,0x00,0x7e] v_fract_f64 v[0:1], 0.159154943091895317852646485335 // SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] -// VI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xf8,0x38,0x00,0x7e] -v_trunc_f32 v0, 0.159154943091895317852646485335 \ No newline at end of file +// VI: v_trunc_f32_e32 v0, 1/2pi ; encoding: [0xf8,0x38,0x00,0x7e] +v_trunc_f32 v0, 0.159154943091895317852646485335 -- 2.7.4