aco: allocate a temp VGPR for some 8-bit/16-bit reduction ops on GFX10
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 18 May 2020 15:03:21 +0000 (17:03 +0200)
committerMarge Bot <eric+marge@anholt.net>
Fri, 29 May 2020 11:20:58 +0000 (11:20 +0000)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5148>

src/amd/compiler/aco_reduce_assign.cpp

index 3dd396e..708f401 100644 (file)
@@ -125,10 +125,13 @@ void setup_reduce_temp(Program* program)
                           op == fmin64 || op == fmax64 || op == umin64 ||
                           op == umax64 || op == imin64 || op == imax64 ||
                           op == imul64;
+         bool gfx10_need_vtmp = op == imul8 || op == imax8 || op == imin8 || op == umin8 ||
+                                op == imul16 || op == imax16 || op == imin16 || op == umin16 ||
+                                op == iadd64;
 
          if (program->chip_class >= GFX10 && cluster_size == 64)
             need_vtmp = true;
-         if (program->chip_class >= GFX10 && op == iadd64)
+         if (program->chip_class >= GFX10 && gfx10_need_vtmp)
             need_vtmp = true;
          if (program->chip_class <= GFX7)
             need_vtmp = true;