From 4df1e18864dc6b7830bb3c7998889883fe8dae2b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Dec 2012 16:17:58 -0800 Subject: [PATCH] i965/fs: Fix the clock increment in scheduling. I've tested this to be true with various ALU ops on gen7 (with the exception of MADs, which go at either 3 or 4 cycles per dispatch). Acked-by: Kenneth Graunke --- .../drivers/dri/i965/brw_fs_schedule_instructions.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 28e1ebb..458854c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -553,10 +553,22 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) next_block_header->insert_before(chosen->inst); instructions_to_schedule--; - /* Bump the clock. If we expected a delay for scheduling, then - * bump the clock to reflect that. + /* Bump the clock. Instructions in gen hardware are handled one simd4 + * vector at a time, with 1 cycle per vector dispatched. Thus 8-wide + * pixel shaders take 2 cycles to dispatch and 16-wide (compressed) + * instructions take 4. */ - time = MAX2(time + 1, chosen_time); + if (is_compressed(chosen->inst)) + time += 4; + else + time += 2; + + /* If we expected a delay for scheduling, then bump the clock to reflect + * that as well. In reality, the hardware will switch to another + * hyperthread and may not return to dispatching our thread for a while + * even after we're unblocked. + */ + time = MAX2(time, chosen_time); if (debug) { printf("clock %4d, scheduled: ", time); -- 2.7.4