From: Danylo Piliaiev Date: Mon, 17 Apr 2023 13:39:09 +0000 (+0200) Subject: freedreno/computerator: Fix remaining issues with A7XX X-Git-Tag: upstream/23.3.3~2505 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7e10a175c77a908bc9d1bdec445d4365b9dbb365;p=platform%2Fupstream%2Fmesa.git freedreno/computerator: Fix remaining issues with A7XX Signed-off-by: Danylo Piliaiev Part-of: --- diff --git a/src/freedreno/computerator/a6xx.cc b/src/freedreno/computerator/a6xx.cc index d3ff32d..93a3de2 100644 --- a/src/freedreno/computerator/a6xx.cc +++ b/src/freedreno/computerator/a6xx.cc @@ -119,11 +119,13 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); struct ir3_shader_variant *v = ir3_kernel->v; + const unsigned *local_size = kernel->local_size; const struct ir3_info *i = &v->info; enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64; - OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1); - OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4); + OUT_REG(ring, A6XX_SP_MODE_CONTROL(.constant_demotion_enable = true, + .isammode = ISAMMODE_GL, + .shared_consts_enable = false)); OUT_PKT4(ring, REG_A6XX_SP_PERFCTR_ENABLE, 1); OUT_RING(ring, A6XX_SP_PERFCTR_ENABLE_CS); @@ -168,6 +170,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | COND(ir3_kernel->info.early_preamble, A6XX_SP_CS_CTRL_REG0_EARLYPREAMBLE) | A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); + if (CHIP == A7XX) { + OUT_REG(ring, HLSQ_FS_CNTL_0(CHIP, .threadsize = THREAD64)); + + OUT_REG(ring, HLSQ_CONTROL_2_REG(CHIP, .dword = 0xfcfcfcfc), + HLSQ_CONTROL_3_REG(CHIP, .dword = 0xfcfcfcfc), + HLSQ_CONTROL_4_REG(CHIP, .dword = 0xfcfcfcfc), + HLSQ_CONTROL_5_REG(CHIP, .dword = 0x0000fc00), ); + } OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(1) | @@ -192,16 +202,28 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz)); + } else { + enum a7xx_cs_yalign yalign = (local_size[1] % 8 == 0) ? CS_YALIGN_8 + : (local_size[1] % 4 == 0) ? CS_YALIGN_4 + : (local_size[1] % 2 == 0) ? CS_YALIGN_2 + : CS_YALIGN_1; + + OUT_REG(ring, A7XX_HLSQ_CS_CNTL_1(.linearlocalidregid = regid(63, 0), + .threadsize = thrsz, + .unk11 = true, + .unk22 = true, + .yalign = yalign, )); } if (CHIP == A7XX || a6xx_backend->info->a6xx.has_lpac) { - OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2); + OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 1); OUT_RING(ring, A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) | A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | - A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz)); + OUT_REG(ring, + SP_CS_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0), + .threadsize = thrsz, )); } OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); @@ -463,6 +485,12 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], .localsizey = local_size[1] - 1, .localsizez = local_size[2] - 1, )); + if (CHIP == A7XX) { + OUT_REG(ring, A7XX_HLSQ_CS_LOCAL_SIZE(.localsizex = local_size[0] - 1, + .localsizey = local_size[1] - 1, + .localsizez = local_size[2] - 1, )); + } + OUT_REG(ring, HLSQ_CS_NDRANGE_1(CHIP, .globalsize_x = local_size[0] * num_groups[0], )); diff --git a/src/freedreno/computerator/meson.build b/src/freedreno/computerator/meson.build index dcdfd80..d0d21e5 100644 --- a/src/freedreno/computerator/meson.build +++ b/src/freedreno/computerator/meson.build @@ -29,8 +29,13 @@ computerator_files = [ computerator_cpp_args = cpp.get_supported_arguments([ '-Wno-sign-compare', + '-Wno-array-bounds', ]) +if meson.is_cross_build() + computerator_cpp_args += '-Wno-array-bounds' +endif + computerator = executable( 'computerator', computerator_files, diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 9fb5728..c01a5b3 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3684,7 +3684,7 @@ to upconvert to 32b float internally? - + + + + + + + + + + - + @@ -4146,13 +4155,22 @@ to upconvert to 32b float internally? - + + + + + + + + + + - - + +