From f6df092925c2f06efe5e10280bb16676fa49b73c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 24 Aug 2023 19:05:47 -0400 Subject: [PATCH] agx: Annotate opcodes with a scheduling class Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_opcodes.py | 88 ++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index ca1e211..2585809 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -11,7 +11,7 @@ VARIABLE = ~0 class Opcode(object): def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, - can_reorder, encoding_16, encoding_32): + can_reorder, schedule_class, encoding_16, encoding_32): self.name = name self.dests = dests if dests != VARIABLE else 0 self.srcs = srcs if srcs != VARIABLE else 0 @@ -21,6 +21,7 @@ class Opcode(object): self.is_float = is_float self.can_eliminate = can_eliminate self.can_reorder = can_reorder + self.schedule_class = schedule_class self.encoding_16 = encoding_16 self.encoding_32 = encoding_32 @@ -46,11 +47,13 @@ class Encoding(object): assert(length_long == length_short + (4 if length_short > 8 else 2)) def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, - can_eliminate = True, can_reorder = True, encoding_16 = None): + can_eliminate = True, can_reorder = True, encoding_16 = None, + schedule_class = "none"): encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None - opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, can_reorder, encoding_16, encoding_32) + opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, + can_reorder, schedule_class, encoding_16, encoding_32) def immediate(name, ctype = "uint32_t"): imm = Immediate(name, ctype) @@ -165,10 +168,10 @@ INTERPOLATION = enum("interpolation", { FUNOP = lambda x: (x << 28) FUNOP_MASK = FUNOP((1 << 14) - 1) -def funop(name, opcode): +def funop(name, opcode, schedule_class = "none"): op(name, (0x0A | L | (opcode << 28), 0x3F | L | (((1 << 14) - 1) << 28), 6, _), - srcs = 1, is_float = True) + srcs = 1, is_float = True, schedule_class = schedule_class) def iunop(name, opcode): assert(opcode < 4) @@ -180,8 +183,8 @@ def iunop(name, opcode): # Listing of opcodes funop("floor", 0b000000) funop("srsqrt", 0b000001) -funop("dfdx", 0b000100) -funop("dfdy", 0b000110) +funop("dfdx", 0b000100, schedule_class = "coverage") +funop("dfdy", 0b000110, schedule_class = "coverage") funop("rcp", 0b001000) funop("rsqrt", 0b001001) funop("sin_pt_1", 0b001010) @@ -258,29 +261,33 @@ op("texture_sample", for memory, can_reorder in [("texture", True), ("image", False)]: op(f"{memory}_load", encoding_32 = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET], - can_reorder = can_reorder) + can_reorder = can_reorder, + schedule_class = "none" if can_reorder else "load") # sources are base, index op("device_load", encoding_32 = (0x05, 0x7F, 6, 8), - srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False) + srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False, + schedule_class = "load") # sources are base (relative to workgroup memory), index op("local_load", encoding_32 = (0b1101001, 0, 6, 8), - srcs = 2, imms = [FORMAT, MASK]) + srcs = 2, imms = [FORMAT, MASK], can_reorder = False, + schedule_class = "load") # sources are value, base, index # TODO: Consider permitting the short form op("device_store", encoding_32 = (0x45 | (1 << 47), 0, 8, _), - dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False) + dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False, + schedule_class = "store") # sources are value, base, index op("local_store", encoding_32 = (0b0101001, 0, 6, 8), dests = 0, srcs = 3, imms = [FORMAT, MASK], - can_eliminate=False) + can_eliminate=False, schedule_class = "store") # sources are value, index # TODO: Consider permitting the short form @@ -291,38 +298,44 @@ op("uniform_store", # sources are value, base, index op("atomic", encoding_32 = (0x15 | (1 << 26) | (1 << 31) | (5 << 44), 0x3F | (1 << 26) | (1 << 31) | (5 << 44), 8, _), - dests = 1, srcs = 3, imms = [ATOMIC_OPC, SCOREBOARD], can_eliminate = False) + dests = 1, srcs = 3, imms = [ATOMIC_OPC, SCOREBOARD], + can_eliminate = False, schedule_class = "atomic") # XXX: stop hardcoding the long form op("local_atomic", encoding_32 = (0x19 | (1 << 15) | (1 << 36) | (1 << 47), 0x3F | (1 << 36) | (1 << 47), 10, _), - dests = 1, srcs = 3, imms = [ATOMIC_OPC], can_eliminate = False) + dests = 1, srcs = 3, imms = [ATOMIC_OPC], schedule_class = "atomic", + can_eliminate = False) op("wait", (0x38, 0xFF, 2, _), dests = 0, - can_eliminate = False, imms = [SCOREBOARD]) + can_eliminate = False, imms = [SCOREBOARD], schedule_class = "invalid") op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR]) -op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 2, can_eliminate = False) +op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 2, + can_eliminate = False, schedule_class = "coverage") # Sources: sample mask, combined depth/stencil op("zs_emit", (0x41, 0xFF | L, 4, _), dests = 0, srcs = 2, - can_eliminate = False, imms = [ZS]) + can_eliminate = False, imms = [ZS], schedule_class = "coverage") # Essentially same encoding. Last source is the sample mask op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 1, - imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False) + imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False, + schedule_class = "coverage") op("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 2, - can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET]) + can_eliminate = False, imms = [FORMAT, MASK, PIXEL_OFFSET], + schedule_class = "coverage") for (name, exact) in [("any", 0xC000), ("none", 0xC020)]: op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0, - can_eliminate = False, imms = [TARGET]) + can_eliminate = False, schedule_class = "invalid", imms = [TARGET]) # TODO: model implicit r0l destinations op("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _), - dests = 0, srcs = 0, can_eliminate = False, imms = [NEST]) + dests = 0, srcs = 0, can_eliminate = False, schedule_class = "invalid", + imms = [NEST]) for is_float in [False, True]: mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38) @@ -334,7 +347,8 @@ for is_float in [False, True]: imms = [NEST, FCOND if is_float else ICOND, INVERT_COND] op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False, - imms = imms, is_float = is_float) + imms = imms, is_float = is_float, + schedule_class = "preload" if cf == "else" else "invalid") op("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE]) op("intl", (0x3E, 0x7F, 6, _), srcs = 2, imms = []) @@ -350,28 +364,32 @@ op("iterproj", (0x21, 0xBF, 8, _), srcs = 3, imms = [CHANNELS, INTERPOLATION]) op("ldcf", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS]) op("st_vary", None, dests = 0, srcs = 2, can_eliminate = False) op("no_varyings", (0x80000051, 0xFFFFFFFF, 4, _), dests = 0, can_eliminate = False) -op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False) -op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False) -op("wait_pix", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False) -op("signal_pix", (0x58, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False) +op("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False, + schedule_class = "invalid") +op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False, + schedule_class = "invalid") +op("wait_pix", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], + can_eliminate = False, schedule_class = "coverage") +op("signal_pix", (0x58, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], + can_eliminate = False, schedule_class = "coverage") # Sources are the data vector, the coordinate vector, the LOD, the bindless # table if present (zero for texture state registers), and texture index. op("image_write", (0xF1 | (1 << 23) | (9 << 43), 0xFF, 6, 8), dests = 0, srcs = 5, imms - = [DIM], can_eliminate = False) + = [DIM], can_eliminate = False, schedule_class = "store") # Sources are the image and the offset within shared memory # TODO: Do we need the short encoding? op("block_image_store", (0xB1, 0xFF, 10, _), dests = 0, srcs = 2, - imms = [FORMAT, DIM], can_eliminate = False) + imms = [FORMAT, DIM], can_eliminate = False, schedule_class = "store") # Barriers op("threadgroup_barrier", (0x0068, 0xFFFF, 2, _), dests = 0, srcs = 0, - can_eliminate = False) + can_eliminate = False, schedule_class = "barrier") def memory_barrier(name, a, b, c): op(name, (0xF5 | (a << 10) | (b << 8) | (c << 12), 0xFFFF, 2, _), dests = 0, srcs = 0, - can_eliminate = False) + can_eliminate = False, schedule_class = "barrier") memory_barrier("memory_barrier", 1, 2, 9) @@ -391,17 +409,19 @@ op("and", _, srcs = 2) op("or", _, srcs = 2) # Indicates the logical end of the block, before final branches/control flow -op("logical_end", _, dests = 0, srcs = 0, can_eliminate = False) +op("logical_end", _, dests = 0, srcs = 0, can_eliminate = False, + schedule_class = "invalid") op("collect", _, srcs = VARIABLE) op("split", _, srcs = 1, dests = VARIABLE) -op("phi", _, srcs = VARIABLE) +op("phi", _, srcs = VARIABLE, schedule_class = "preload") op("unit_test", _, dests = 0, srcs = 1, can_eliminate = False) # Like mov, but takes a register and can only appear at the start. Guaranteed # to be coalesced during RA, rather than lowered to a real move. -op("preload", _, srcs = 1) +op("preload", _, srcs = 1, schedule_class = "preload") # Set the nesting counter. Lowers to mov_imm r0l, #nest after RA. -op("nest", _, dests = 0, imms = [IMM], can_eliminate = False) +op("nest", _, dests = 0, imms = [IMM], can_eliminate = False, + schedule_class = "barrier") -- 2.7.4