nir: add new VARYING_SLOTs and shader info for packed 16-bit varyings

author Marek Olšák <marek.olsak@amd.com>

Tue, 9 Feb 2021 15:58:51 +0000 (10:58 -0500)

committer Marge Bot <eric+marge@anholt.net>

Tue, 13 Apr 2021 05:07:42 +0000 (05:07 +0000)
author Marek Olšák <marek.olsak@amd.com>
Tue, 9 Feb 2021 15:58:51 +0000 (10:58 -0500)
committer Marge Bot <eric+marge@anholt.net>
Tue, 13 Apr 2021 05:07:42 +0000 (05:07 +0000)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index 345bb25..2054fd7 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1803,7 +1803,8 @@ typedef struct nir_io_semantics {
     unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */
     unsigned medium_precision:1; /* GLSL mediump qualifier */
     unsigned per_view:1;
-   unsigned _pad:7;
+   unsigned high_16bits:1; /* whether accessing low or high half of the slot */
+   unsigned _pad:6;
  } nir_io_semantics;
  
  #define NIR_INTRINSIC_MAX_INPUTS 11
diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c

index 5fbaecb..0b01441 100644 (file)
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -288,11 +288,13 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
                        void *dead_ctx)
  {
     uint64_t slot_mask = 0;
+   uint16_t slot_mask_16bit = 0;
  
     if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {
        nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
  
-      if (semantics.location >= VARYING_SLOT_PATCH0) {
+      if (semantics.location >= VARYING_SLOT_PATCH0 &&
+          semantics.location <= VARYING_SLOT_PATCH31) {
           /* Generic per-patch I/O. */
           assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&
                   instr->intrinsic == nir_intrinsic_load_input) ||
@@ -303,8 +305,16 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
           semantics.location -= VARYING_SLOT_PATCH0;
        }
  
-      slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
-      assert(util_bitcount64(slot_mask) == semantics.num_slots);
+      if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&
+          semantics.location <= VARYING_SLOT_VAR15_16BIT) {
+         /* Convert num_slots from the units of half vectors to full vectors. */
+         unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;
+         slot_mask_16bit =
+            BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);
+      } else {
+         slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
+         assert(util_bitcount64(slot_mask) == semantics.num_slots);
+      }
     }
  
     switch (instr->intrinsic) {
@@ -373,8 +383,11 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
              shader->info.patch_inputs_read_indirectly |= slot_mask;
        } else {
           shader->info.inputs_read |= slot_mask;
-         if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
+         shader->info.inputs_read_16bit |= slot_mask_16bit;
+         if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
              shader->info.inputs_read_indirectly |= slot_mask;
+            shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;
+         }
        }
  
        if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
@@ -392,8 +405,11 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
              shader->info.patch_outputs_accessed_indirectly |= slot_mask;
        } else {
           shader->info.outputs_read |= slot_mask;
-         if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
+         shader->info.outputs_read_16bit |= slot_mask_16bit;
+         if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
              shader->info.outputs_accessed_indirectly |= slot_mask;
+            shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
+         }
        }
  
        if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
@@ -415,8 +431,11 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
              shader->info.patch_outputs_accessed_indirectly |= slot_mask;
        } else {
           shader->info.outputs_written |= slot_mask;
-         if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
+         shader->info.outputs_written_16bit |= slot_mask_16bit;
+         if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
              shader->info.outputs_accessed_indirectly |= slot_mask;
+            shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
+         }
        }
  
        if (shader->info.stage == MESA_SHADER_FRAGMENT &&
@@ -839,6 +858,11 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
     shader->info.inputs_read = 0;
     shader->info.outputs_written = 0;
     shader->info.outputs_read = 0;
+   shader->info.inputs_read_16bit = 0;
+   shader->info.outputs_written_16bit = 0;
+   shader->info.outputs_read_16bit = 0;
+   shader->info.inputs_read_indirectly_16bit = 0;
+   shader->info.outputs_accessed_indirectly_16bit = 0;
     shader->info.patch_outputs_read = 0;
     shader->info.patch_inputs_read = 0;
     shader->info.patch_outputs_written = 0;
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c

index af740da..4ceac07 100644 (file)
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -985,10 +985,12 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
                 }
                 fprintf(fp, ")");
              }
-            if (state->shader->info.stage == MESA_SHADER_FRAGMENT &&
-                nir_intrinsic_io_semantics(instr).medium_precision) {
+            if (nir_intrinsic_io_semantics(instr).medium_precision) {
                 fprintf(fp, " mediump");
              }
+            if (nir_intrinsic_io_semantics(instr).high_16bits) {
+               fprintf(fp, " high_16bits");
+            }
           }
           break;
  
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h

index c44f4e3..8883e8d 100644 (file)
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -333,11 +333,74 @@ typedef enum
     VARYING_SLOT_VAR29,
     VARYING_SLOT_VAR30,
     VARYING_SLOT_VAR31,
+   /* Per-patch varyings for tessellation. */
+   VARYING_SLOT_PATCH0,
+   VARYING_SLOT_PATCH1,
+   VARYING_SLOT_PATCH2,
+   VARYING_SLOT_PATCH3,
+   VARYING_SLOT_PATCH4,
+   VARYING_SLOT_PATCH5,
+   VARYING_SLOT_PATCH6,
+   VARYING_SLOT_PATCH7,
+   VARYING_SLOT_PATCH8,
+   VARYING_SLOT_PATCH9,
+   VARYING_SLOT_PATCH10,
+   VARYING_SLOT_PATCH11,
+   VARYING_SLOT_PATCH12,
+   VARYING_SLOT_PATCH13,
+   VARYING_SLOT_PATCH14,
+   VARYING_SLOT_PATCH15,
+   VARYING_SLOT_PATCH16,
+   VARYING_SLOT_PATCH17,
+   VARYING_SLOT_PATCH18,
+   VARYING_SLOT_PATCH19,
+   VARYING_SLOT_PATCH20,
+   VARYING_SLOT_PATCH21,
+   VARYING_SLOT_PATCH22,
+   VARYING_SLOT_PATCH23,
+   VARYING_SLOT_PATCH24,
+   VARYING_SLOT_PATCH25,
+   VARYING_SLOT_PATCH26,
+   VARYING_SLOT_PATCH27,
+   VARYING_SLOT_PATCH28,
+   VARYING_SLOT_PATCH29,
+   VARYING_SLOT_PATCH30,
+   VARYING_SLOT_PATCH31,
+   /* 32 16-bit vec4 slots packed in 16 32-bit vec4 slots for GLES/mediump.
+    * They are really just additional generic slots used for 16-bit data to
+    * prevent conflicts between neighboring mediump and non-mediump varyings
+    * that can't be packed without breaking one or the other, which is
+    * a limitation of separate shaders. This allows linking shaders in 32 bits
+    * and then get an optimally packed 16-bit varyings by remapping the IO
+    * locations to these slots. The remapping can also be undone trivially.
+    *
+    * nir_io_semantics::high_16bit determines which half of the slot is
+    * accessed. The low and high halves share the same IO "base" number.
+    * Drivers can treat these as 32-bit slots everywhere except for FP16
+    * interpolation.
+    */
+   VARYING_SLOT_VAR0_16BIT,
+   VARYING_SLOT_VAR1_16BIT,
+   VARYING_SLOT_VAR2_16BIT,
+   VARYING_SLOT_VAR3_16BIT,
+   VARYING_SLOT_VAR4_16BIT,
+   VARYING_SLOT_VAR5_16BIT,
+   VARYING_SLOT_VAR6_16BIT,
+   VARYING_SLOT_VAR7_16BIT,
+   VARYING_SLOT_VAR8_16BIT,
+   VARYING_SLOT_VAR9_16BIT,
+   VARYING_SLOT_VAR10_16BIT,
+   VARYING_SLOT_VAR11_16BIT,
+   VARYING_SLOT_VAR12_16BIT,
+   VARYING_SLOT_VAR13_16BIT,
+   VARYING_SLOT_VAR14_16BIT,
+   VARYING_SLOT_VAR15_16BIT,
+
+   NUM_TOTAL_VARYING_SLOTS,
  } gl_varying_slot;
  
  
  #define VARYING_SLOT_MAX       (VARYING_SLOT_VAR0 + MAX_VARYING)
-#define VARYING_SLOT_PATCH0    (VARYING_SLOT_MAX)
  #define VARYING_SLOT_TESS_MAX  (VARYING_SLOT_PATCH0 + MAX_VARYING)
  #define MAX_VARYINGS_INCL_PATCH (VARYING_SLOT_TESS_MAX - VARYING_SLOT_VAR0)
  
diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h

index ebc2c43..d0c0dd2 100644 (file)
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -152,6 +152,15 @@ typedef struct shader_info {
     /* Which system values are actually read */
     BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
  
+   /* Which 16-bit inputs and outputs are used corresponding to
+    * VARYING_SLOT_VARn_16BIT.
+    */
+   uint16_t inputs_read_16bit;
+   uint16_t outputs_written_16bit;
+   uint16_t outputs_read_16bit;
+   uint16_t inputs_read_indirectly_16bit;
+   uint16_t outputs_accessed_indirectly_16bit;
+
     /* Which patch inputs are actually read */
     uint32_t patch_inputs_read;
     /* Which patch outputs are actually written */
author	Marek Olšák <marek.olsak@amd.com>
	Tue, 9 Feb 2021 15:58:51 +0000 (10:58 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Tue, 13 Apr 2021 05:07:42 +0000 (05:07 +0000)
src/compiler/nir/nir.h		patch \| blob \| history
src/compiler/nir/nir_gather_info.c		patch \| blob \| history
src/compiler/nir/nir_print.c		patch \| blob \| history
src/compiler/shader_enums.h		patch \| blob \| history
src/compiler/shader_info.h		patch \| blob \| history