class Opcode(object):
def __init__(self, name, dests, srcs, imms, is_float, can_eliminate,
- can_reorder, encoding_16, encoding_32):
+ can_reorder, schedule_class, encoding_16, encoding_32):
self.name = name
self.dests = dests if dests != VARIABLE else 0
self.srcs = srcs if srcs != VARIABLE else 0
self.is_float = is_float
self.can_eliminate = can_eliminate
self.can_reorder = can_reorder
+ self.schedule_class = schedule_class
self.encoding_16 = encoding_16
self.encoding_32 = encoding_32
assert(length_long == length_short + (4 if length_short > 8 else 2))
def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False,
- can_eliminate = True, can_reorder = True, encoding_16 = None):
+ can_eliminate = True, can_reorder = True, encoding_16 = None,
+ schedule_class = "none"):
encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
- opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, can_reorder, encoding_16, encoding_32)
+ opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate,
+ can_reorder, schedule_class, encoding_16, encoding_32)
def immediate(name, ctype = "uint32_t"):
imm = Immediate(name, ctype)
FUNOP = lambda x: (x << 28)
FUNOP_MASK = FUNOP((1 << 14) - 1)
-def funop(name, opcode):
+def funop(name, opcode, schedule_class = "none"):
op(name, (0x0A | L | (opcode << 28),
0x3F | L | (((1 << 14) - 1) << 28), 6, _),
- srcs = 1, is_float = True)
+ srcs = 1, is_float = True, schedule_class = schedule_class)
def iunop(name, opcode):
assert(opcode < 4)
# Listing of opcodes
funop("floor", 0b000000)
funop("srsqrt", 0b000001)
-funop("dfdx", 0b000100)
-funop("dfdy", 0b000110)
+funop("dfdx", 0b000100, schedule_class = "coverage")
+funop("dfdy", 0b000110, schedule_class = "coverage")
funop("rcp", 0b001000)
funop("rsqrt", 0b001001)
funop("sin_pt_1", 0b001010)
for memory, can_reorder in [("texture", True), ("image", False)]:
op(f"{memory}_load", encoding_32 = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET],
- can_reorder = can_reorder)
+ can_reorder = can_reorder,
+ schedule_class = "none" if can_reorder else "load")
# sources are base, index
op("device_load",
encoding_32 = (0x05, 0x7F, 6, 8),
- srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False)
+ srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False,
+ schedule_class = "load")
# sources are base (relative to workgroup memory), index
op("local_load",
encoding_32 = (0b1101001, 0, 6, 8),
- srcs = 2, imms = [FORMAT, MASK])
+ srcs = 2, imms = [FORMAT, MASK], can_reorder = False,
+ schedule_class = "load")
# sources are value, base, index
# TODO: Consider permitting the short form
op("device_store",
encoding_32 = (0x45 | (1 << 47), 0, 8, _),
- dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False)
+ dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False,
+ schedule_class = "store")
# sources are value, base, index
op("local_store",
encoding_32 = (0b0101001, 0, 6, 8),
dests = 0, srcs = 3, imms = [FORMAT, MASK],
- can_eliminate=False)
+ can_eliminate=False, schedule_class = "store")
# sources are value, index
# TODO: Consider permitting the short form
# sources are value, base, index
op("atomic",
encoding_32 = (0x15 | (1 << 26) | (1 << 31) | (5 << 44), 0x3F | (1 << 26) | (1 << 31) | (5 << 44), 8, _),
- dests = 1, srcs = 3, imms = [ATOMIC_OPC, SCOREBOARD], can_eliminate = False)
+ dests = 1, srcs = 3, imms = [ATOMIC_OPC, SCOREBOARD],
+ can_eliminate = False, schedule_class = "atomic")
# XXX: stop hardcoding the long form
op("local_atomic",
encoding_32 = (0x19 | (1 << 15) | (1 << 36) | (1 << 47), 0x3F | (1 << 36) | (1 << 47), 10, _),
- dests = 1, srcs = 3, imms = [ATOMIC_OPC], can_eliminate = False)
+ dests = 1, srcs = 3, imms = [ATOMIC_OPC], schedule_class = "atomic",
+ can_eliminate = False)
op("wait", (0x38, 0xFF, 2, _), dests = 0,
- can_eliminate = False, imms = [SCOREBOARD])
+ can_eliminate = False, imms = [SCOREBOARD], schedule_class = "invalid")
op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR])
-op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 2, can_eliminate = False)
+op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 2,
+ can_eliminate = False, schedule_class = "coverage")
# Sources: sample mask, combined depth/stencil
op("zs_emit", (0x41, 0xFF | L, 4, _), dests = 0, srcs = 2,
- can_eliminate = False, imms = [ZS])
+ can_eliminate = False, imms = [ZS], schedule_class = "coverage")
# Essentially same encoding. Last source is the sample mask
op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 1,
- imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False)
+ imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False,
+ schedule_class = "coverage")
op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 2,
- can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET])
+ can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET],
+ schedule_class = "coverage")
for (name, exact) in [("any", 0xC000), ("none", 0xC020)]:
op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,
- can_eliminate = False, imms = [TARGET])
+ can_eliminate = False, schedule_class = "invalid", imms = [TARGET])
# TODO: model implicit r0l destinations
op("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _),
- dests = 0, srcs = 0, can_eliminate = False, imms = [NEST])
+ dests = 0, srcs = 0, can_eliminate = False, schedule_class = "invalid",
+ imms = [NEST])
for is_float in [False, True]:
mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38)
imms = [NEST, FCOND if is_float else ICOND, INVERT_COND]
op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False,
- imms = imms, is_float = is_float)
+ imms = imms, is_float = is_float,
+ schedule_class = "preload" if cf == "else" else "invalid")
op("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE])
op("intl", (0x3E, 0x7F, 6, _), srcs = 2, imms = [])
op("ldcf", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS])
op("st_vary", None, dests = 0, srcs = 2, can_eliminate = False)
op("no_varyings", (0x80000051, 0xFFFFFFFF, 4, _), dests = 0, can_eliminate = False)
-op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
-op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
-op("wait_pix", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
-op("signal_pix", (0x58, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
+op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False,
+ schedule_class = "invalid")
+op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False,
+ schedule_class = "invalid")
+op("wait_pix", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT],
+ can_eliminate = False, schedule_class = "coverage")
+op("signal_pix", (0x58, 0xFF, 4, _), dests = 0, imms = [WRITEOUT],
+ can_eliminate = False, schedule_class = "coverage")
# Sources are the data vector, the coordinate vector, the LOD, the bindless
# table if present (zero for texture state registers), and texture index.
op("image_write", (0xF1 | (1 << 23) | (9 << 43), 0xFF, 6, 8), dests = 0, srcs = 5, imms
- = [DIM], can_eliminate = False)
+ = [DIM], can_eliminate = False, schedule_class = "store")
# Sources are the image and the offset within shared memory
# TODO: Do we need the short encoding?
op("block_image_store", (0xB1, 0xFF, 10, _), dests = 0, srcs = 2,
- imms = [FORMAT, DIM], can_eliminate = False)
+ imms = [FORMAT, DIM], can_eliminate = False, schedule_class = "store")
# Barriers
op("threadgroup_barrier", (0x0068, 0xFFFF, 2, _), dests = 0, srcs = 0,
- can_eliminate = False)
+ can_eliminate = False, schedule_class = "barrier")
def memory_barrier(name, a, b, c):
op(name, (0xF5 | (a << 10) | (b << 8) | (c << 12), 0xFFFF, 2, _), dests = 0, srcs = 0,
- can_eliminate = False)
+ can_eliminate = False, schedule_class = "barrier")
memory_barrier("memory_barrier", 1, 2, 9)
op("or", _, srcs = 2)
# Indicates the logical end of the block, before final branches/control flow
-op("logical_end", _, dests = 0, srcs = 0, can_eliminate = False)
+op("logical_end", _, dests = 0, srcs = 0, can_eliminate = False,
+ schedule_class = "invalid")
op("collect", _, srcs = VARIABLE)
op("split", _, srcs = 1, dests = VARIABLE)
-op("phi", _, srcs = VARIABLE)
+op("phi", _, srcs = VARIABLE, schedule_class = "preload")
op("unit_test", _, dests = 0, srcs = 1, can_eliminate = False)
# Like mov, but takes a register and can only appear at the start. Guaranteed
# to be coalesced during RA, rather than lowered to a real move.
-op("preload", _, srcs = 1)
+op("preload", _, srcs = 1, schedule_class = "preload")
# Set the nesting counter. Lowers to mov_imm r0l, #nest after RA.
-op("nest", _, dests = 0, imms = [IMM], can_eliminate = False)
+op("nest", _, dests = 0, imms = [IMM], can_eliminate = False,
+ schedule_class = "barrier")