From: Jason Ekstrand Date: Mon, 20 Nov 2017 23:03:46 +0000 (+0100) Subject: i965/fs: Enables 16-bit load_ubo with sampler X-Git-Tag: upstream/18.1.0~3408 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3282309f74e72991635bfde08f5e5e58a18604c7;p=platform%2Fupstream%2Fmesa.git i965/fs: Enables 16-bit load_ubo with sampler load_ubo is using 32-bit loads as uniforms surfaces have a 32-bit surface format defined. So when reading 16-bit components with the sampler we need to unshuffle two 16-bit components from each 32-bit component. Using the sampler avoids the use of the byte_scattered_read message that needs one message for each component and is supposed to be slower. v2: (Jason Ekstrand) - Simplify component selection and unshuffling for different bitsizes - Remove SKL optimization of reading only two 32-bit components when reading 16-bits types. Reviewed-by: Jose Maria Casanova Crespo --- diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 91399c6..93bb6b4 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -191,14 +191,21 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, vec4_result, surf_index, vec4_offset); inst->size_written = 4 * vec4_result.component_size(inst->exec_size); - if (type_sz(dst.type) == 8) { - shuffle_32bit_load_result_to_64bit_data( - bld, retype(vec4_result, dst.type), vec4_result, 2); + fs_reg dw = offset(vec4_result, bld, (const_offset & 0xf) / 4); + switch (type_sz(dst.type)) { + case 2: + shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 1); + bld.MOV(dst, subscript(dw, dst.type, (const_offset / 2) & 1)); + break; + case 4: + bld.MOV(dst, retype(dw, dst.type)); + break; + case 8: + shuffle_32bit_load_result_to_64bit_data(bld, dst, dw, 1); + break; + default: + unreachable("Unsupported bit_size"); } - - vec4_result.type = dst.type; - bld.MOV(dst, offset(vec4_result, bld, - (const_offset & 0xf) / type_sz(vec4_result.type))); } /**