#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols()) {
unsigned int ptrSize = MAI->getCodePointerSize();
- O << " .u" << ptrSize * 8 << " ";
- getSymbol(GVar)->print(O, MAI);
- O << "[" << ElementSize / ptrSize << "] = {";
- aggBuffer.printWords(O);
- O << "}";
+ if (ElementSize % ptrSize ||
+ !aggBuffer.allSymbolsAligned(ptrSize)) {
+ // Print in bytes and use the mask() operator for pointers.
+ if (!STI.hasMaskOperator())
+ report_fatal_error(
+ "initialized packed aggregate with pointers '" +
+ GVar->getName() +
+ "' requires at least PTX ISA version 7.1");
+ O << " .u8 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[" << ElementSize << "] = {";
+ aggBuffer.printBytes(O);
+ O << "}";
+ } else {
+ O << " .u" << ptrSize * 8 << " ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[" << ElementSize / ptrSize << "] = {";
+ aggBuffer.printWords(O);
+ O << "}";
+ }
} else {
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
}
void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
- for (unsigned int pos = 0; pos < size; ++pos) {
+ unsigned int ptrSize = AP.MAI->getCodePointerSize();
+ symbolPosInBuffer.push_back(size);
+ unsigned int nSym = 0;
+ unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
+ for (unsigned int pos = 0; pos < size;) {
if (pos)
os << ", ";
- os << (unsigned int)buffer[pos];
+ if (pos != nextSymbolPos) {
+ os << (unsigned int)buffer[pos];
+ ++pos;
+ continue;
+ }
+ // Generate a per-byte mask() operator for the symbol, which looks like:
+ // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
+ // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
+ std::string symText;
+ llvm::raw_string_ostream oss(symText);
+ printSymbol(nSym, oss);
+ for (unsigned i = 0; i < ptrSize; ++i) {
+ if (i)
+ os << ", ";
+ llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
+ os << "(" << symText << ")";
+ }
+ pos += ptrSize;
+ nextSymbolPos = symbolPosInBuffer[++nSym];
+ assert(nextSymbolPos >= pos);
}
}
assert(nextSymbolPos % ptrSize == 0);
assert(nextSymbolPos >= pos + ptrSize);
} else if (ptrSize == 4)
- os << *(uint32_t *)(&buffer[pos]);
+ os << support::endian::read32le(&buffer[pos]);
else
- os << *(uint64_t *)(&buffer[pos]);
+ os << support::endian::read64le(&buffer[pos]);
}
}
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
class AggBuffer {
- // Used to buffer the emitted string for initializing global
- // aggregates.
+ // Used to buffer the emitted string for initializing global aggregates.
//
- // Normally an aggregate (array, vector or structure) is emitted
- // as a u8[]. However, if one element/field of the aggregate
- // is a non-NULL address, then the aggregate is emitted as u32[]
- // or u64[].
+ // Normally an aggregate (array, vector, or structure) is emitted as a u8[].
+ // However, if either element/field of the aggregate is a non-NULL address,
+ // and all such addresses are properly aligned, then the aggregate is
+ // emitted as u32[] or u64[]. In the case of unaligned addresses, the
+ // aggregate is emitted as u8[], and the mask() operator is used for all
+ // pointers.
//
- // We first layout the aggregate in 'buffer' in bytes, except for
- // those symbol addresses. For the i-th symbol address in the
- //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
- // are filled with 0s. symbolPosInBuffer[i-1] records its position
- // in 'buffer', and Symbols[i-1] records the Value*.
+ // We first layout the aggregate in 'buffer' in bytes, except for those
+ // symbol addresses. For the i-th symbol address in the aggregate, its
+ // corresponding 4-byte or 8-byte elements in 'buffer' are filled with 0s.
+ // symbolPosInBuffer[i-1] records its position in 'buffer', and Symbols[i-1]
+ // records the Value*.
//
- // Once we have this AggBuffer setup, we can choose how to print
- // it out.
+ // Once we have this AggBuffer setup, we can choose how to print it out.
public:
// number of symbol addresses
unsigned numSymbols() const { return Symbols.size(); }
+ bool allSymbolsAligned(unsigned ptrSize) const {
+ return llvm::all_of(symbolPosInBuffer,
+ [=](unsigned pos) { return pos % ptrSize == 0; });
+ }
+
private:
const unsigned size; // size of the buffer in bytes
std::vector<unsigned char> buffer; // the buffer
--- /dev/null
+; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx70 2>&1 | \
+; RUN: FileCheck %s --check-prefix=ERROR
+
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | \
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | \
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK64
+; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
+; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %}
+
+;; Test that packed structs with symbol references are represented using the
+;; mask() operator.
+
+declare void @func()
+@p = addrspace(1) global i8 0
+; CHECK: .extern .func func
+; CHECK: .u8 p;
+
+%t1 = type <{ i16, i8*, i8, void ()*, i8*, i32 }>
+@s1 = addrspace(1) global %t1 <{
+; ERROR: initialized packed aggregate with pointers 's1' requires at least PTX ISA version 7.1
+; CHECK32: .global .align 1 .u8 s1[19] = {
+; CHECK64: .global .align 1 .u8 s1[31] = {
+ i16 12,
+; CHECK-SAME: 12, 0,
+ i8* addrspacecast (i8 addrspace(1)* @p to i8*),
+; CHECK-SAME: 0xFF(generic(p)), 0xFF00(generic(p)), 0xFF0000(generic(p)), 0xFF000000(generic(p)),
+; CHECK64-SAME: 0xFF00000000(generic(p)), 0xFF0000000000(generic(p)), 0xFF000000000000(generic(p)), 0xFF00000000000000(generic(p)),
+ i8 34,
+; CHECK-SAME: 34
+ void ()* @func,
+; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
+; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
+ i8* addrspacecast (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* @p, i32 3) to i8*),
+; CHECK-SAME: 0xFF(generic(p)+3), 0xFF00(generic(p)+3), 0xFF0000(generic(p)+3), 0xFF000000(generic(p)+3),
+; CHECK64-SAME: 0xFF00000000(generic(p)+3), 0xFF0000000000(generic(p)+3), 0xFF000000000000(generic(p)+3), 0xFF00000000000000(generic(p)+3),
+ i32 56 }>, align 1
+; CHECK-SAME: 56, 0, 0, 0};
+
+;; Test a case than an unaligned pointer is in a nested struct.
+
+%t2i = type <{ void ()* }>
+%t2o = type { i8, %t2i, i32 }
+@s2 = addrspace(1) global %t2o {
+; CHECK32: .global .align 8 .u8 s2[12] = {
+; CHECK64: .global .align 8 .u8 s2[16] = {
+ i8 12,
+; CHECK-SAME: 12,
+ %t2i <{ void()* @func }>,
+; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
+; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
+ i32 34}
+; CHECK-SAME: 0, 0, 0,
+; CHECK-SAME: 34, 0, 0, 0};
+
+;; Test that a packed struct which size is not multiple of the pointer size
+;; is printed in bytes and uses the mask() operator for pointers even though
+;; the pointers are aligned.
+
+%t3 = type <{ void ()*, i8 }>
+@s3 = addrspace(1) global %t3 <{
+; CHECK32: .global .align 1 .u8 s3[5] = {
+; CHECK64: .global .align 1 .u8 s3[9] = {
+ void ()* @func,
+; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
+; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
+ i8 56 }>, align 1
+; CHECK-SAME: 56};
+
+;; Test that a packed struct with aligned pointers is printed in words.
+
+%t4 = type <{ void ()*, i64 }>
+@s4 = addrspace(1) global %t4 <{
+; CHECK32: .global .align 1 .u32 s4[3] = {
+; CHECK64: .global .align 1 .u64 s4[2] = {
+ void()* @func,
+; CHECK-SAME: func,
+ i64 15}>, align 1
+; CHECK32-SAME: 15, 0};
+; CHECK64-SAME: 15};
+
+;; Test that a packed struct with unaligned pointers inside an array is handled.
+
+%t5 = type <{ void ()*, i16 }>
+@a5 = addrspace(1) global [2 x %t5] [%t5 <{ void()* @func, i16 5 }>, %t5 <{ void()* @func, i16 9 }> ]
+; CHECK32: .global .align 8 .u8 a5[12] = {
+; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 5, 0,
+; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 9, 0};
+; CHECK64: .global .align 8 .u8 a5[20] = {
+; CHECK64-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
+; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
+; CHECK64-SAME: 5, 0,
+; CHECK64-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
+; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
+; CHECK64-SAME: 9, 0};