From 618ff11457f477ce1d30ecfbcee469287760838b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 29 Nov 2014 02:13:07 -0500
Subject: [PATCH] freedreno/a3xx: don't use half precision shaders for
 int/float32

Integer outputs end up getting mangled due to cov.f32f16, and float32
loses precision. Use full precision shaders in both of those cases.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c   |  5 ++++-
 src/gallium/drivers/freedreno/a3xx/fd3_format.h | 18 ++++++++++++++++++
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c   | 22 +++++++++++++---------
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index ff6db58..2ae4cfb 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -220,6 +220,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 		const union pipe_color_union *color, double depth, unsigned stencil)
 {
 	struct fd3_context *fd3_ctx = fd3_context(ctx);
+	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
 	struct fd_ringbuffer *ring = ctx->ring;
 	unsigned dirty = ctx->dirty;
 	unsigned ce, i;
@@ -227,8 +229,9 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 		.vtx  = &fd3_ctx->solid_vbuf_state,
 		.prog = &ctx->solid_prog,
 		.key = {
-			.half_precision = true,
+			.half_precision = fd3_half_precision(format),
 		},
+		.format = format,
 	};
 
 	dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index ebd6795..043454e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -25,6 +25,7 @@
 #ifndef FD3_FORMAT_H_
 #define FD3_FORMAT_H_
 
+#include "util/u_format.h"
 #include "freedreno_util.h"
 
 #include "a3xx.xml.h"
@@ -39,4 +40,21 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
 uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
 		unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
 
+static INLINE bool
+fd3_half_precision(enum pipe_format format)
+{
+	/* colors are provided in consts, which go through cov.f32f16, which will
+	 * break these values
+	 */
+	if (util_format_is_pure_integer(format))
+		return false;
+
+	/* avoid losing precision on 32-bit float formats */
+	if (util_format_is_float(format) &&
+		util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
+		return false;
+
+	return true;
+}
+
 #endif /* FD3_FORMAT_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index dae0b11..8edfb8b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -43,12 +43,6 @@
 #include "fd3_format.h"
 #include "fd3_zsa.h"
 
-static const struct ir3_shader_key key = {
-		// XXX should set this based on render target format!  We don't
-		// want half_precision if float32 render target!!!
-		.half_precision = true,
-};
-
 static void
 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
 		struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
@@ -161,7 +155,9 @@ emit_binning_workaround(struct fd_context *ctx)
 	struct fd3_emit emit = {
 			.vtx = &fd3_ctx->solid_vbuf_state,
 			.prog = &ctx->solid_prog,
-			.key = key,
+			.key = {
+				.half_precision = true,
+			},
 	};
 
 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
@@ -336,10 +332,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 	struct fd3_context *fd3_ctx = fd3_context(ctx);
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
 	struct fd3_emit emit = {
 			.vtx = &fd3_ctx->solid_vbuf_state,
 			.prog = &ctx->solid_prog,
-			.key = key,
+			.key = {
+				.half_precision = fd3_half_precision(format),
+			},
+			.format = format,
 	};
 
 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
@@ -458,10 +458,14 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
 	struct fd3_emit emit = {
 			.vtx = &fd3_ctx->blit_vbuf_state,
 			.prog = &ctx->blit_prog,
-			.key = key,
+			.key = {
+				.half_precision = fd3_half_precision(format),
+			},
+			.format = format,
 	};
 	float x0, y0, x1, y1;
 	unsigned bin_w = tile->bin_w;
-- 
2.7.4