We have competent lowering in NIR already available.
Drivers exposing CAP_DOUBLES but not SHADER_CAP_DROUND:
- d3d12 (NIR lowers ~0 if the underlying impl doesn't do floats)
- svga (Now sets the NIR lowering options)
- softpipe (Doesn't do GL4 so you can't use doubles anyway)
- llvmpipe (Lowers dround_even in NIR and passees the rest through
successfully)
- zink (NIR lowers ~0 if the underlying impl doesn't do floats,
otherwise passes things through successfully, except needed
dround_even lowering to avoid lavapipe regression with
native doubles)
- r600 (sets NIR rounding lowering flags, and lowers all fsign)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25777>
samplers.
* ``PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS``: The maximum number of texture
sampler views. Must not be lower than PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS.
-* ``PIPE_SHADER_CAP_DROUND_SUPPORTED``: Whether double precision rounding
- is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
ignore tgsi_declaration_range::Last for shader inputs and outputs.
* ``PIPE_SHADER_CAP_MAX_SHADER_BUFFERS``: Maximum number of memory buffers
bool do_vec_index_to_cond_assign(exec_list *instructions);
void lower_discard_flow(exec_list *instructions);
bool lower_instructions(exec_list *instructions,
- bool have_dround,
bool have_gpu_shader5);
bool lower_clip_cull_distance(struct gl_shader_program *prog,
gl_linked_shader *shader);
* must replace them with some other expression tree. This pass lowers some
* of the most common cases, allowing the lowering code to be implemented once
* rather than in each driver backend.
- *
- * Currently supported transformations:
- * - DOPS_TO_DFRAC
- *
- * DOPS_TO_DFRAC:
- * --------------
- * Converts double trunc, ceil, floor, round to fract
*/
#include "program/prog_instruction.h" /* for swizzle */
#include <math.h>
/* Operations for lower_instructions() */
-#define DOPS_TO_DFRAC 0x800
#define FIND_LSB_TO_FLOAT_CAST 0x20000
#define FIND_MSB_TO_FLOAT_CAST 0x40000
#define IMUL_HIGH_TO_MUL 0x80000
void double_dot_to_fma(ir_expression *);
void double_lrp(ir_expression *);
- void dceil_to_dfrac(ir_expression *);
- void dfloor_to_dfrac(ir_expression *);
- void dround_even_to_dfrac(ir_expression *);
- void dtrunc_to_dfrac(ir_expression *);
- void dsign_to_csel(ir_expression *);
void find_lsb_to_float_cast(ir_expression *ir);
void find_msb_to_float_cast(ir_expression *ir);
void imul_high_to_mul(ir_expression *ir);
#define lowering(x) (this->lower & x)
bool
-lower_instructions(exec_list *instructions,
- bool have_dround, bool have_gpu_shader5)
+lower_instructions(exec_list *instructions,bool have_gpu_shader5)
{
unsigned what_to_lower =
- (have_dround ? 0 : DOPS_TO_DFRAC) |
/* Assume that if ARB_gpu_shader5 is not supported then all of the
* extended integer functions need lowering. It may be necessary to add
* some caps for individual instructions.
}
void
-lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
-{
- /*
- * frtemp = frac(x);
- * temp = sub(x, frtemp);
- * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
- */
- ir_instruction &i = *base_ir;
- ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
- ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
- ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
- ir_var_temporary);
-
- i.insert_before(frtemp);
- i.insert_before(assign(frtemp, fract(ir->operands[0])));
-
- ir->operation = ir_binop_add;
- ir->init_num_operands();
- ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
- ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
-
- this->progress = true;
-}
-
-void
-lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
-{
- /*
- * frtemp = frac(x);
- * result = sub(x, frtemp);
- */
- ir->operation = ir_binop_sub;
- ir->init_num_operands();
- ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
-
- this->progress = true;
-}
-void
-lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
-{
- /*
- * insane but works
- * temp = x + 0.5;
- * frtemp = frac(temp);
- * t2 = sub(temp, frtemp);
- * if (frac(x) == 0.5)
- * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
- * else
- * result = t2;
-
- */
- ir_instruction &i = *base_ir;
- ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
- ir_var_temporary);
- ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
- ir_var_temporary);
- ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
- ir_var_temporary);
- ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
- ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
- ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
-
- i.insert_before(temp);
- i.insert_before(assign(temp, add(ir->operands[0], p5)));
-
- i.insert_before(frtemp);
- i.insert_before(assign(frtemp, fract(temp)));
-
- i.insert_before(t2);
- i.insert_before(assign(t2, sub(temp, frtemp)));
-
- ir->operation = ir_triop_csel;
- ir->init_num_operands();
- ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)),
- p5->clone(ir, NULL));
- ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))),
- zero),
- t2,
- sub(t2, one));
- ir->operands[2] = new(ir) ir_dereference_variable(t2);
-
- this->progress = true;
-}
-
-void
-lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
-{
- /*
- * frtemp = frac(x);
- * temp = sub(x, frtemp);
- * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1;
- */
- ir_rvalue *arg = ir->operands[0];
- ir_instruction &i = *base_ir;
-
- ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
- ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
- ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp",
- ir_var_temporary);
- ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
- ir_var_temporary);
-
- i.insert_before(frtemp);
- i.insert_before(assign(frtemp, fract(arg)));
- i.insert_before(temp);
- i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp)));
-
- ir->operation = ir_triop_csel;
- ir->init_num_operands();
- ir->operands[0] = gequal(arg->clone(ir, NULL), zero);
- ir->operands[1] = new (ir) ir_dereference_variable(temp);
- ir->operands[2] = add(temp,
- csel(equal(frtemp, zero->clone(ir, NULL)),
- zero->clone(ir, NULL),
- one));
-
- this->progress = true;
-}
-
-void
-lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
-{
- /*
- * temp = x > 0.0 ? 1.0 : 0.0;
- * result = x < 0.0 ? -1.0 : temp;
- */
- ir_rvalue *arg = ir->operands[0];
- ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
- ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
- ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements);
-
- ir->operation = ir_triop_csel;
- ir->init_num_operands();
- ir->operands[0] = less(arg->clone(ir, NULL),
- zero->clone(ir, NULL));
- ir->operands[1] = neg_one;
- ir->operands[2] = csel(greater(arg, zero),
- one,
- zero->clone(ir, NULL));
-
- this->progress = true;
-}
-
-void
lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir)
{
/* For more details, see:
double_lrp(ir);
break;
- case ir_unop_trunc:
- if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dtrunc_to_dfrac(ir);
- break;
-
- case ir_unop_ceil:
- if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dceil_to_dfrac(ir);
- break;
-
- case ir_unop_floor:
- if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dfloor_to_dfrac(ir);
- break;
-
- case ir_unop_round_even:
- if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dround_even_to_dfrac(ir);
- break;
-
- case ir_unop_sign:
- if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
- dsign_to_csel(ir);
- break;
-
case ir_unop_find_lsb:
if (lowering(FIND_LSB_TO_FLOAT_CAST))
find_lsb_to_float_cast(ir);
return do_vec_index_to_cond_assign(ir);
} else if (sscanf(optimization, "lower_instructions ( %d ) ",
&int_0) == 1) {
- return lower_instructions(ir, false, false);
+ return lower_instructions(ir, false);
} else {
printf("Unrecognized optimization %s\n", optimization);
exit(EXIT_FAILURE);
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
return 0;
return 1;
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
return 0;
return false;
case PIPE_SHADER_CAP_INT64_ATOMICS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
return 0;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 1 << PIPE_SHADER_IR_NIR;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- return 1;
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_FP16_DERIVATIVES:
*/
return PIPE_MAX_SAMPLERS;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- return 0; /* not implemented */
-
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0; /* no idea */
return shader == PIPE_SHADER_FRAGMENT
? screen->specs.max_ps_uniforms * sizeof(float[4])
: screen->specs.max_vs_uniforms * sizeof(float[4]);
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return false;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
/* a2xx compiler doesn't handle indirect: */
return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_SUBROUTINES:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return I915_TEX_UNITS;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
return irs;
}
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- return 1;
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return 1 << PIPE_SHADER_IR_NIR;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
return 1;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- return 1;
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_FP16:
return false;
case PIPE_SHADER_CAP_INT64_ATOMICS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
ir |= 1 << PIPE_SHADER_IR_NIR;
return ir;
}
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- return 0;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
if (rscreen->b.family >= CHIP_CEDAR &&
case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: /* lowered in finalize_nir */
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: /* lowered in finalize_nir */
return 1;
return 16;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR);
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
return (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR);
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR);
else
return 0;
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- /* For the above cases, we rely on the GLSL compiler to translate/lower
- * the TGIS instruction into other instructions we do support.
- */
- return 0;
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return sws->have_gl43 ? SVGA_MAX_IMAGES : 0;
.use_interpolated_input_intrinsics = true
#define VGPU10_OPTIONS \
- .lower_doubles_options = nir_lower_dfloor | nir_lower_dsign, \
+ .lower_doubles_options = nir_lower_dfloor | nir_lower_dsign | nir_lower_dceil | nir_lower_dtrunc | nir_lower_dround_even, \
.lower_fmod = true, \
.lower_fpow = true
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
case PIPE_SHADER_CAP_INT16:
case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
.lower_usub_sat = true,
.lower_vector_cmp = true,
.lower_int64_options = 0,
- .lower_doubles_options = 0,
+ .lower_doubles_options = nir_lower_dround_even,
.lower_uniforms_to_ubo = true,
.has_fsub = true,
.has_isub = true,
screen->info.props.limits.maxPerStageDescriptorSampledImages),
PIPE_MAX_SAMPLERS);
- case PIPE_SHADER_CAP_DROUND_SUPPORTED:
- return 0; /* not implemented */
-
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0; /* no idea */
PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS,
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED,
PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS,
- PIPE_SHADER_CAP_DROUND_SUPPORTED, /* all rounding modes */
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
PIPE_SHADER_CAP_MAX_SHADER_BUFFERS,
PIPE_SHADER_CAP_SUPPORTED_IRS,
struct gl_shader_program *shader_program)
{
struct st_context *st = st_context(ctx);
- struct pipe_screen *pscreen = st->screen;
struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
unsigned num_shaders = 0;
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
exec_list *ir = shader->ir;
- gl_shader_stage stage = shader->Stage;
-
- enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
- bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_DROUND_SUPPORTED);
lower_packing_builtins(ir, ctx->Extensions.ARB_shading_language_packing,
ctx->Extensions.ARB_gpu_shader5,
ctx->st->has_half_float_packing);
do_mat_op_to_vec(ir);
- lower_instructions(ir, have_dround,
- ctx->Extensions.ARB_gpu_shader5);
+ lower_instructions(ir, ctx->Extensions.ARB_gpu_shader5);
do_vec_index_to_cond_assign(ir);