* - denormals break v_mad_f32
* - GFX6 & GFX7 would be very slow
*/
- conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
- conf->float_mode |= V_00B028_FP_64_DENORMS;
+ conf->float_mode &= ~V_00B028_FP_32_DENORMS;
+ conf->float_mode |= V_00B028_FP_16_64_DENORMS;
}
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
},
"FLOAT_MODE": {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192}
]
},
"ForceControl": {
missing_enums_all = {
'FLOAT_MODE': {
"entries": [
+ {"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
+ {"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
{"name": "FP_32_DENORMS", "value": 48},
- {"name": "FP_64_DENORMS", "value": 192},
- {"name": "FP_ALL_DENORMS", "value": 240}
+ {"name": "FP_16_64_DENORMS", "value": 192},
]
},
'QUANT_MODE': {
shader->info.uses_instanceid = sel->info.uses_instanceid;
shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
+ /* Set the FP ALU behavior. */
+ /* By default, we disable denormals for FP32 and enable them for FP16 and FP64
+ * for performance and correctness reasons. FP32 denormals can't be enabled because
+ * they break output modifiers and v_mad_f32 and are very slow on GFX6-7.
+ *
+ * float_controls_execution_mode defines the set of valid behaviors. Contradicting flags
+ * can be set simultaneously, which means we are allowed to choose, but not really because
+ * some options cause GLCTS failures.
+ */
+ unsigned float_mode = V_00B028_FP_16_64_DENORMS;
+
+ if (!(nir->info.float_controls_execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) &&
+ nir->info.float_controls_execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32)
+ float_mode |= V_00B028_FP_32_ROUND_TOWARDS_ZERO;
+
+ if (!(nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 |
+ FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64)) &&
+ nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 |
+ FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64))
+ float_mode |= V_00B028_FP_16_64_ROUND_TOWARDS_ZERO;
+
+ if (!(nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_DENORM_PRESERVE_FP16 |
+ FLOAT_CONTROLS_DENORM_PRESERVE_FP64)) &&
+ nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 |
+ FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64))
+ float_mode &= ~V_00B028_FP_16_64_DENORMS;
+
/* TODO: ACO could compile non-monolithic shaders here (starting
* with PS and NGG VS), but monolithic shaders should be compiled
* by LLVM due to more complicated compilation.
if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir))
return false;
+ shader->config.float_mode = float_mode;
+
/* The GS copy shader is compiled next. */
if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);