From 23f42f8dcfe7ca275ff3cbb5586b5a07ce8df778 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 23 Aug 2019 18:23:32 -0700 Subject: [PATCH] intel/compiler: Use new Gen11 headerless RT writes for MRT cases Gen11 adds support for specifying the render target index and src0 alpha present bits in the extended message descriptor. Previously, we had to use a message header for this, requiring extra instructions to write the fields, and two registers of extra payload. Improves performance on my ICL 8x8 frequency locked to 700Mhz, on iris: GfxBench5 Manhattan 3.0: 2.13635% +/- 0.159859% (n=5) GfxBench5 Aztec Ruins: 1.57173% +/- 0.128749% (n=5) Synmark2 OglDeferred: 2.86914% +/- 0.191211% (n=10) Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 72393b7..8bf11f9 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4281,8 +4281,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length = 2; } else if ((devinfo->gen <= 7 && !devinfo->is_haswell && prog_data->uses_kill) || - color1.file != BAD_FILE || - key->nr_color_regions > 1) { + (devinfo->gen < 11 && + (color1.file != BAD_FILE || key->nr_color_regions > 1))) { /* From the Sandy Bridge PRM, volume 4, page 198: * * "Dispatched Pixel Enables. One bit per pixel indicating @@ -4356,6 +4356,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length++; } + bool src0_alpha_present = false; + if (src0_alpha.file != BAD_FILE) { for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) { const fs_builder &ubld = bld.exec_all().group(8, i) @@ -4365,12 +4367,14 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, setup_color_payload(ubld, key, &sources[length], tmp, 1); length++; } + src0_alpha_present = true; } else if (prog_data->replicate_alpha && inst->target != 0) { /* Handle the case when fragment shader doesn't write to draw buffer * zero. No need to call setup_color_payload() for src0_alpha because * alpha value will be undefined. */ length += bld.dispatch_width() / 8; + src0_alpha_present = true; } if (sample_mask.file != BAD_FILE) { @@ -4448,6 +4452,13 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, inst->last_rt, false); + if (devinfo->gen >= 11) { + /* Set the "Render Target Index" and "Src0 Alpha Present" fields + * in the extended message descriptor, in lieu of using a header. + */ + ex_desc = inst->target << 12 | src0_alpha_present << 15; + } + inst->opcode = SHADER_OPCODE_SEND; inst->resize_sources(3); inst->sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; -- 2.7.4