HSW_SFID_DATAPORT_DATA_CACHE_1 = 12,
HSW_SFID_CRE = 13,
+ GFX12_SFID_TGM = 13, /* Typed Global Memory */
+ GFX12_SFID_SLM = 14, /* Shared Local Memory */
+ GFX12_SFID_UGM = 15, /* Untyped Global Memory */
+
GEN_RT_SFID_BINDLESS_THREAD_DISPATCH = 7,
GEN_RT_SFID_RAY_TRACE_ACCELERATOR = 8,
};
#define GEN_RT_BTD_SHADER_TYPE_MISS 2
#define GEN_RT_BTD_SHADER_TYPE_INTERSECTION 3
+/* Starting with Xe-HPG, the old dataport was massively reworked dataport.
+ * The new thing, called Load/Store Cache or LSC, has a significantly improved
+ * interface. Instead of bespoke messages for every case, there's basically
+ * one or two messages with different bits to control things like address
+ * size, how much data is read/written, etc. It's way nicer but also means we
+ * get to rewrite all our dataport encoding/decoding code. This patch kicks
+ * off the party with all of the new enums.
+ */
+enum lsc_opcode {
+ LSC_OP_LOAD = 0,
+ LSC_OP_LOAD_CMASK = 2,
+ LSC_OP_STORE = 4,
+ LSC_OP_STORE_CMASK = 6,
+ LSC_OP_ATOMIC_INC = 8,
+ LSC_OP_ATOMIC_DEC = 9,
+ LSC_OP_ATOMIC_LOAD = 10,
+ LSC_OP_ATOMIC_STORE = 11,
+ LSC_OP_ATOMIC_ADD = 12,
+ LSC_OP_ATOMIC_SUB = 13,
+ LSC_OP_ATOMIC_MIN = 14,
+ LSC_OP_ATOMIC_MAX = 15,
+ LSC_OP_ATOMIC_UMIN = 16,
+ LSC_OP_ATOMIC_UMAX = 17,
+ LSC_OP_ATOMIC_CMPXCHG = 18,
+ LSC_OP_ATOMIC_FADD = 19,
+ LSC_OP_ATOMIC_FSUB = 20,
+ LSC_OP_ATOMIC_FMIN = 21,
+ LSC_OP_ATOMIC_FMAX = 22,
+ LSC_OP_ATOMIC_FCMPXCHG = 23,
+ LSC_OP_ATOMIC_AND = 24,
+ LSC_OP_ATOMIC_OR = 25,
+ LSC_OP_ATOMIC_XOR = 26,
+ LSC_OP_FENCE = 31
+};
+
+/*
+ * Specifies the size of the dataport address payload in registers.
+ */
+enum PACKED lsc_addr_reg_size {
+ LSC_ADDR_REG_SIZE_1 = 1,
+ LSC_ADDR_REG_SIZE_2 = 2,
+ LSC_ADDR_REG_SIZE_3 = 3,
+ LSC_ADDR_REG_SIZE_4 = 4,
+ LSC_ADDR_REG_SIZE_6 = 6,
+ LSC_ADDR_REG_SIZE_8 = 8,
+};
+
+/*
+ * Specifies the size of the address payload item in a dataport message.
+ */
+enum PACKED lsc_addr_size {
+ LSC_ADDR_SIZE_A16 = 1, /* 16-bit address offset */
+ LSC_ADDR_SIZE_A32 = 2, /* 32-bit address offset */
+ LSC_ADDR_SIZE_A64 = 3, /* 64-bit address offset */
+};
+
+/*
+ * Specifies the type of the address payload item in a dataport message. The
+ * address type specifies how the dataport message decodes the Extended
+ * Descriptor for the surface attributes and address calculation.
+ */
+enum PACKED lsc_addr_surface_type {
+ LSC_ADDR_SURFTYPE_FLAT = 0, /* Flat */
+ LSC_ADDR_SURFTYPE_BSS = 1, /* Bindless surface state */
+ LSC_ADDR_SURFTYPE_SS = 2, /* Surface state */
+ LSC_ADDR_SURFTYPE_BTI = 3, /* Binding table index */
+};
+
+/*
+ * Specifies the dataport message override to the default L1 and L3 memory
+ * cache policies. Dataport L1 cache policies are uncached (UC), cached (C),
+ * cache streaming (S) and invalidate-after-read (IAR). Dataport L3 cache
+ * policies are uncached (UC) and cached (C).
+ */
+enum lsc_cache_load {
+ /* No override. Use the non-pipelined state or surface state cache settings
+ * for L1 and L3.
+ */
+ LSC_CACHE_LOAD_L1STATE_L3MOCS = 0,
+ /* Override to L1 uncached and L3 uncached */
+ LSC_CACHE_LOAD_L1UC_L3UC = 1,
+ /* Override to L1 uncached and L3 cached */
+ LSC_CACHE_LOAD_L1UC_L3C = 2,
+ /* Override to L1 cached and L3 uncached */
+ LSC_CACHE_LOAD_L1C_L3UC = 3,
+ /* Override to cache at both L1 and L3 */
+ LSC_CACHE_LOAD_L1C_L3C = 4,
+ /* Override to L1 streaming load and L3 uncached */
+ LSC_CACHE_LOAD_L1S_L3UC = 5,
+ /* Override to L1 streaming load and L3 cached */
+ LSC_CACHE_LOAD_L1S_L3C = 6,
+ /* For load messages, override to L1 invalidate-after-read, and L3 cached. */
+ LSC_CACHE_LOAD_L1IAR_L3C = 7,
+};
+
+/*
+ * Specifies the dataport message override to the default L1 and L3 memory
+ * cache policies. Dataport L1 cache policies are uncached (UC), write-through
+ * (WT), write-back (WB) and streaming (S). Dataport L3 cache policies are
+ * uncached (UC) and cached (WB).
+ */
+enum PACKED lsc_cache_store {
+ /* No override. Use the non-pipelined or surface state cache settings for L1
+ * and L3.
+ */
+ LSC_CACHE_STORE_L1STATE_L3MOCS = 0,
+ /* Override to L1 uncached and L3 uncached */
+ LSC_CACHE_STORE_L1UC_L3UC = 1,
+ /* Override to L1 uncached and L3 cached */
+ LSC_CACHE_STORE_L1UC_L3WB = 2,
+ /* Override to L1 write-through and L3 uncached */
+ LSC_CACHE_STORE_L1WT_L3UC = 3,
+ /* Override to L1 write-through and L3 cached */
+ LSC_CACHE_STORE_L1WT_L3WB = 4,
+ /* Override to L1 streaming and L3 uncached */
+ LSC_CACHE_STORE_L1S_L3UC = 5,
+ /* Override to L1 streaming and L3 cached */
+ LSC_CACHE_STORE_L1S_L3WB = 6,
+ /* Override to L1 write-back, and L3 cached */
+ LSC_CACHE_STORE_L1WB_L3WB = 7,
+
+};
+
+/*
+ * Specifies which components of the data payload 4-element vector (X,Y,Z,W) is
+ * packed into the register payload.
+ */
+enum PACKED lsc_cmask {
+ LSC_CMASK_X = 0x1,
+ LSC_CMASK_Y = 0x2,
+ LSC_CMASK_XY = 0x3,
+ LSC_CMASK_Z = 0x4,
+ LSC_CMASK_XZ = 0x5,
+ LSC_CMASK_YZ = 0x6,
+ LSC_CMASK_XYZ = 0x7,
+ LSC_CMASK_W = 0x8,
+ LSC_CMASK_XW = 0x9,
+ LSC_CMASK_YW = 0xa,
+ LSC_CMASK_XYW = 0xb,
+ LSC_CMASK_ZW = 0xc,
+ LSC_CMASK_XZW = 0xd,
+ LSC_CMASK_YZW = 0xe,
+ LSC_CMASK_XYZW = 0xf,
+};
+
+/*
+ * Specifies the size of the data payload item in a dataport message.
+ */
+enum PACKED lsc_data_size {
+ /* 8-bit scalar data value in memory, packed into a 8-bit data value in
+ * register.
+ */
+ LSC_DATA_SIZE_D8 = 0,
+ /* 16-bit scalar data value in memory, packed into a 16-bit data value in
+ * register.
+ */
+ LSC_DATA_SIZE_D16 = 1,
+ /* 32-bit scalar data value in memory, packed into 32-bit data value in
+ * register.
+ */
+ LSC_DATA_SIZE_D32 = 2,
+ /* 64-bit scalar data value in memory, packed into 64-bit data value in
+ * register.
+ */
+ LSC_DATA_SIZE_D64 = 3,
+ /* 8-bit scalar data value in memory, packed into 32-bit unsigned data value
+ * in register.
+ */
+ LSC_DATA_SIZE_D8U32 = 4,
+ /* 16-bit scalar data value in memory, packed into 32-bit unsigned data
+ * value in register.
+ */
+ LSC_DATA_SIZE_D16U32 = 5,
+ /* 16-bit scalar BigFloat data value in memory, packed into 32-bit float
+ * value in register.
+ */
+ LSC_DATA_SIZE_D16BF32 = 6,
+};
+
+/*
+ * Enum specifies the scope of the fence.
+ */
+enum PACKED lsc_fence_scope {
+ /* Wait until all previous memory transactions from this thread are observed
+ * within the local thread-group.
+ */
+ LSC_FENCE_THREADGROUP = 0,
+ /* Wait until all previous memory transactions from this thread are observed
+ * within the local sub-slice.
+ */
+ LSC_FENCE_LOCAL = 1,
+ /* Wait until all previous memory transactions from this thread are observed
+ * in the local tile.
+ */
+ LSC_FENCE_TILE = 2,
+ /* Wait until all previous memory transactions from this thread are observed
+ * in the local GPU.
+ */
+ LSC_FENCE_GPU = 3,
+ /* Wait until all previous memory transactions from this thread are observed
+ * across all GPUs in the system.
+ */
+ LSC_FENCE_ALL_GPU = 4,
+ /* Wait until all previous memory transactions from this thread are observed
+ * at the "system" level.
+ */
+ LSC_FENCE_SYSTEM_RELEASE = 5,
+ /* For GPUs that do not follow PCIe Write ordering for downstream writes
+ * targeting device memory, a fence message with scope=System_Acquire will
+ * commit to device memory all downstream and peer writes that have reached
+ * the device.
+ */
+ LSC_FENCE_SYSTEM_ACQUIRE = 6,
+};
+
+/*
+ * Specifies the type of cache flush operation to perform after a fence is
+ * complete.
+ */
+enum PACKED lsc_flush_type {
+ LSC_FLUSH_TYPE_NONE = 0,
+ /*
+ * For a R/W cache, evict dirty lines (M to I state) and invalidate clean
+ * lines. For a RO cache, invalidate clean lines.
+ */
+ LSC_FLUSH_TYPE_EVICT = 1,
+ /*
+ * For both R/W and RO cache, invalidate clean lines in the cache.
+ */
+ LSC_FLUSH_TYPE_INVALIDATE = 2,
+ /*
+ * For a R/W cache, invalidate dirty lines (M to I state), without
+ * write-back to next level. This opcode does nothing for a RO cache.
+ */
+ LSC_FLUSH_TYPE_DISCARD = 3,
+ /*
+ * For a R/W cache, write-back dirty lines to the next level, but kept in
+ * the cache as "clean" (M to V state). This opcode does nothing for a RO
+ * cache.
+ */
+ LSC_FLUSH_TYPE_CLEAN = 4,
+ /*
+ * Flush "RW" section of the L3 cache, but leave L1 and L2 caches untouched.
+ */
+ LSC_FLUSH_TYPE_L3ONLY = 5,
+};
+
+enum PACKED lsc_backup_fence_routing {
+ /* Normal routing: UGM fence is routed to UGM pipeline. */
+ LSC_NORMAL_ROUTING,
+ /* Route UGM fence to LSC unit. */
+ LSC_ROUTE_TO_LSC,
+};
+
+/*
+ * Specifies the size of the vector in a dataport message.
+ */
+enum PACKED lsc_vect_size {
+ LSC_VECT_SIZE_V1 = 0, /* vector length 1 */
+ LSC_VECT_SIZE_V2 = 1, /* vector length 2 */
+ LSC_VECT_SIZE_V3 = 2, /* Vector length 3 */
+ LSC_VECT_SIZE_V4 = 3, /* Vector length 4 */
+ LSC_VECT_SIZE_V8 = 4, /* Vector length 8 */
+ LSC_VECT_SIZE_V16 = 5, /* Vector length 16 */
+ LSC_VECT_SIZE_V32 = 6, /* Vector length 32 */
+ LSC_VECT_SIZE_V64 = 7, /* Vector length 64 */
+};
+
+#define LSC_ONE_ADDR_REG 1
+
#endif /* BRW_EU_DEFINES_H */