From 66fcfa6894ab61a8cb70955f4a4113729e4a8099 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 3 Oct 2016 13:33:12 +0200 Subject: [PATCH] i965/vec4: make offset() work in terms of a simd width and scalar components So that it has the same semantics as the scalar backend implementation. The helper will now take a simd width (which is always 8 in vec4 mode) and step as many scalar components as specified by that width, respecting the size of the scalar channels. v2 (Curro): - Remove the assertion in offset(), byte_offset() has the same checks. - Use byte_offset() directly instead of add_byte_offset(). - Make things more clear by explicitly including the vertical stride in the byte offset expression. Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_ir_vec4.h | 18 ++++++++---------- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp | 14 +++++++------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 1633c4c..5dfdfce 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -104,12 +104,11 @@ byte_offset(src_reg reg, unsigned bytes) } static inline src_reg -offset(src_reg reg, unsigned delta) +offset(src_reg reg, unsigned width, unsigned delta) { - assert(delta == 0 || - (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM)); - reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE); - return reg; + const unsigned stride = (reg.file == UNIFORM ? 0 : 4); + const unsigned num_components = MAX2(width / 4 * stride, 4); + return byte_offset(reg, num_components * type_sz(reg.type) * delta); } /** @@ -180,12 +179,11 @@ byte_offset(dst_reg reg, unsigned bytes) } static inline dst_reg -offset(dst_reg reg, unsigned delta) +offset(dst_reg reg, unsigned width, unsigned delta) { - assert(delta == 0 || - (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM)); - reg.offset += delta * (reg.file == UNIFORM ? 16 : REG_SIZE); - return reg; + const unsigned stride = (reg.file == UNIFORM ? 0 : 4); + const unsigned num_components = MAX2(width / 4 * stride, 4); + return byte_offset(reg, num_components * type_sz(reg.type) * delta); } static inline dst_reg diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index aabf082..fc4eb3a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -256,7 +256,7 @@ dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg, dst_reg reg; reg = v->nir_locals[nir_reg->index]; - reg = offset(reg, base_offset); + reg = offset(reg, 8, base_offset); if (indirect) { reg.reladdr = new(v->mem_ctx) src_reg(v->get_nir_src(*indirect, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp index 19c685f..00c94fe 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp @@ -42,9 +42,9 @@ namespace { DIV_ROUND_UP(size * dst_stride, 4)); for (unsigned i = 0; i < size; ++i) - bld.MOV(writemask(offset(dst, i * dst_stride / 4), + bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4), 1 << (i * dst_stride % 4)), - swizzle(offset(src, i * src_stride / 4), + swizzle(offset(src, 8, i * src_stride / 4), brw_swizzle_for_mask(1 << (i * src_stride % 4)))); return src_reg(dst); @@ -124,16 +124,16 @@ namespace brw { unsigned n = 0; if (header_sz) - bld.exec_all().MOV(offset(payload, n++), + bld.exec_all().MOV(offset(payload, 8, n++), retype(header, BRW_REGISTER_TYPE_UD)); for (unsigned i = 0; i < addr_sz; i++) - bld.MOV(offset(payload, n++), - offset(retype(addr, BRW_REGISTER_TYPE_UD), i)); + bld.MOV(offset(payload, 8, n++), + offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i)); for (unsigned i = 0; i < src_sz; i++) - bld.MOV(offset(payload, n++), - offset(retype(src, BRW_REGISTER_TYPE_UD), i)); + bld.MOV(offset(payload, 8, n++), + offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i)); /* Reduce the dynamically uniform surface index to a single * scalar. -- 2.7.4