From 3d2c8529586d0078db080d9317174dcdd06afc6c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 28 Jan 2016 17:13:44 +0000 Subject: [PATCH] AMDGPU: waitcnt operand fixes Summary: Allow lgkmcnt up to 0xF (hardware allows that). Fix mask for ExpCnt in AMDGPUInstPrinter. Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16314 Patch by: Nikolay Haustov llvm-svn: 259059 --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 +++--- llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 7 ++----- llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 4 ++-- llvm/test/MC/AMDGPU/sopp.s | 12 +++++++++--- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0b430ff..5b03059 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1516,7 +1516,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { CntMask = 0x7; CntShift = 4; } else if (CntName == "lgkmcnt") { - CntMask = 0x7; + CntMask = 0xf; CntShift = 8; } else { return true; @@ -1532,8 +1532,8 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { // Disable all counters by default. // vmcnt [3:0] // expcnt [6:4] - // lgkmcnt [10:8] - int64_t CntVal = 0x77f; + // lgkmcnt [11:8] + int64_t CntVal = 0xf7f; SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index a187de8..2cd189d 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -616,12 +616,9 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs - // SIInsertWaits.cpp bits usage does not match ISA docs description but it - // works so it might be a misprint in docs. unsigned SImm16 = MI->getOperand(OpNo).getImm(); unsigned Vmcnt = SImm16 & 0xF; - unsigned Expcnt = (SImm16 >> 4) & 0xF; + unsigned Expcnt = (SImm16 >> 4) & 0x7; unsigned Lgkmcnt = (SImm16 >> 8) & 0xF; bool NeedSpace = false; @@ -638,7 +635,7 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, NeedSpace = true; } - if (Lgkmcnt != 0x7) { + if (Lgkmcnt != 0xF) { if (NeedSpace) O << ' '; O << "lgkmcnt(" << Lgkmcnt << ')'; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 94e6147..7d98e0e88 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -138,7 +138,7 @@ public: char SIInsertWaits::ID = 0; -const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } }; +const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } }; const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { @@ -379,7 +379,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) .addImm((Counts.Named.VM & 0xF) | ((Counts.Named.EXP & 0x7) << 4) | - ((Counts.Named.LGKM & 0x7) << 8)); + ((Counts.Named.LGKM & 0xF) << 8)); LastOpcodeType = OTHER; LastInstWritesM0 = false; diff --git a/llvm/test/MC/AMDGPU/sopp.s b/llvm/test/MC/AMDGPU/sopp.s index b072c16..ec9e984 100644 --- a/llvm/test/MC/AMDGPU/sopp.s +++ b/llvm/test/MC/AMDGPU/sopp.s @@ -40,16 +40,22 @@ s_nop 0xffff // CHECK: s_nop 0xffff ; encoding: [0xff,0xff,0x80,0xbf] // CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] s_waitcnt vmcnt(1) - // CHECK: s_waitcnt vmcnt(1) ; encoding: [0x71,0x07,0x8c,0xbf] + // CHECK: s_waitcnt vmcnt(1) ; encoding: [0x71,0x0f,0x8c,0xbf] + + s_waitcnt vmcnt(9) + // CHECK: s_waitcnt vmcnt(9) ; encoding: [0x79,0x0f,0x8c,0xbf] s_waitcnt expcnt(2) - // CHECK: s_waitcnt expcnt(2) ; encoding: [0x2f,0x07,0x8c,0xbf] + // CHECK: s_waitcnt expcnt(2) ; encoding: [0x2f,0x0f,0x8c,0xbf] s_waitcnt lgkmcnt(3) // CHECK: s_waitcnt lgkmcnt(3) ; encoding: [0x7f,0x03,0x8c,0xbf] + s_waitcnt lgkmcnt(9) + // CHECK: s_waitcnt lgkmcnt(9) ; encoding: [0x7f,0x09,0x8c,0xbf] + s_waitcnt vmcnt(0), expcnt(0) - // CHECK: s_waitcnt vmcnt(0) expcnt(0) ; encoding: [0x00,0x07,0x8c,0xbf] + // CHECK: s_waitcnt vmcnt(0) expcnt(0) ; encoding: [0x00,0x0f,0x8c,0xbf] s_sethalt 9 // CHECK: s_sethalt 9 ; encoding: [0x09,0x00,0x8d,0xbf] -- 2.7.4