From 217d6594dec934b4b34f5c7e0a0cd978339a5ba0 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 9 Mar 2021 11:10:43 -0800 Subject: [PATCH] gallium/indices: Use "__restrict" to help the compiler In a perf trace translate_quads_uint2uint_last2last_prdisable() was showing up as a huge hot spot. Digging through the assembly on arm64 found that the compiler wasn't doing any read caching. Specifically, the generated code looked roughly like this: out[j+0] = in[i+0]; out[j+1] = in[i+1]; out[j+2] = in[i+3]; out[j+3] = in[i+1]; out[j+4] = in[i+2]; out[j+5] = in[i+3]; ...and the compiler was loading "i+1" and "i+3" from memory twice for no reason (instead of caching it). If we sprinkle generous amounts of the `__restrict` keyword then the compiler is able to be much smarter. Not only does it avoid double-loading but it also generates better instructions. It uses two LDRD instructions instead of 6 LDR instructions and uses some STRD too. In one example test this increased FPS from ~25.7 to ~34.5. Change-Id: I88bf8bd9ac421fe48a7d6961e224425c3ae7beee Reported-by: Rob Clark Signed-off-by: Douglas Anderson Reviewed-by: Eric Anholt Part-of: --- src/gallium/auxiliary/indices/u_indices_gen.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py index 3c64bb2..9a6fc06 100644 --- a/src/gallium/auxiliary/indices/u_indices_gen.py +++ b/src/gallium/auxiliary/indices/u_indices_gen.py @@ -190,18 +190,18 @@ def name(intype, outtype, inpv, outpv, pr, prim): def preamble(intype, outtype, inpv, outpv, pr, prim): print('static void ' + name( intype, outtype, inpv, outpv, pr, prim ) + '(') if intype != GENERATE: - print(' const void * _in,') + print(' const void * restrict _in,') print(' unsigned start,') if intype != GENERATE: print(' unsigned in_nr,') print(' unsigned out_nr,') if intype != GENERATE: print(' unsigned restart_index,') - print(' void *_out )') + print(' void * restrict _out )') print('{') if intype != GENERATE: - print(' const ' + intype + '*in = (const ' + intype + '*)_in;') - print(' ' + outtype + ' *out = (' + outtype + '*)_out;') + print(' const ' + intype + '* restrict in = (const ' + intype + '* restrict)_in;') + print(' ' + outtype + ' * restrict out = (' + outtype + '* restrict)_out;') print(' unsigned i, j;') print(' (void)j;') -- 2.7.4