From 3db31c0b06acf2d1505070434764c89bc58a48af Mon Sep 17 00:00:00 2001 From: Jose Maria Casanova Crespo Date: Mon, 20 Nov 2017 23:10:51 +0100 Subject: [PATCH] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components This helpers are used to load/store 16-bit types from/to 32-bit components. The functions shuffle_32bit_load_result_to_16bit_data and shuffle_16bit_data_for_32bit_write are implemented in a similar way than the analogous functions for handling 64-bit types. v1: Explain need of temporary in shuffle operations. (Jason Ekstrand) Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs.h | 11 +++++++ src/intel/compiler/brw_fs_nir.cpp | 60 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 19b897e..3055732 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld, fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld, const fs_reg &src, uint32_t components); + +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components); + +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components); + fs_reg setup_imm_df(const brw::fs_builder &bld, double v); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index ae85834..aa4a8c4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4844,6 +4844,38 @@ shuffle_32bit_load_result_to_64bit_data(const fs_builder &bld, } } +void +shuffle_32bit_load_result_to_16bit_data(const fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components) +{ + assert(type_sz(src.type) == 4); + assert(type_sz(dst.type) == 2); + + /* A temporary is used to un-shuffle the 32-bit data of each component in + * into a valid 16-bit vector. We can't write directly to dst because it + * can be the same register as src and in that case the first MOV in the + * loop below would overwrite the data read in the second MOV. + */ + fs_reg tmp = retype(bld.vgrf(src.type), dst.type); + + for (unsigned i = 0; i < components; i++) { + const fs_reg component_i = + subscript(offset(src, bld, i / 2), dst.type, i % 2); + + bld.MOV(offset(tmp, bld, i % 2), component_i); + + if (i % 2) { + bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0)); + bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1)); + } + } + if (components % 2) { + bld.MOV(offset(dst, bld, components - 1), tmp); + } +} + /** * This helper does the inverse operation of * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA. @@ -4876,6 +4908,34 @@ shuffle_64bit_data_for_32bit_write(const fs_builder &bld, return dst; } +void +shuffle_16bit_data_for_32bit_write(const fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components) +{ + assert(type_sz(src.type) == 2); + assert(type_sz(dst.type) == 4); + + /* A temporary is used to shuffle the 16-bit data of each component in the + * 32-bit data vector. We can't write directly to dst because it can be the + * same register as src and in that case the first MOV in the loop below + * would overwrite the data read in the second MOV. + */ + fs_reg tmp = bld.vgrf(dst.type); + + for (unsigned i = 0; i < components; i++) { + const fs_reg component_i = offset(src, bld, i); + bld.MOV(subscript(tmp, src.type, i % 2), component_i); + if (i % 2) { + bld.MOV(offset(dst, bld, i / 2), tmp); + } + } + if (components % 2) { + bld.MOV(offset(dst, bld, components / 2), tmp); + } +} + fs_reg setup_imm_df(const fs_builder &bld, double v) { -- 2.7.4