From 91702374d5d70296c967c324fff1844cc2933dd4 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Thu, 21 Mar 2019 01:29:52 +0100 Subject: [PATCH] radv: Add ycbcr lowering pass. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/Makefile.sources | 1 + src/amd/vulkan/meson.build | 1 + src/amd/vulkan/radv_nir_lower_ycbcr_textures.c | 371 +++++++++++++++++++++++++ src/amd/vulkan/radv_shader.h | 3 + 4 files changed, 376 insertions(+) create mode 100644 src/amd/vulkan/radv_nir_lower_ycbcr_textures.c diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources index f632756..df90c11 100644 --- a/src/amd/vulkan/Makefile.sources +++ b/src/amd/vulkan/Makefile.sources @@ -54,6 +54,7 @@ VULKAN_FILES := \ radv_meta_resolve.c \ radv_meta_resolve_cs.c \ radv_meta_resolve_fs.c \ + radv_nir_lower_ycbcr_textures.c \ radv_nir_to_llvm.c \ radv_llvm_helper.cpp \ radv_pass.c \ diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 3d7ad9f..5322673 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -82,6 +82,7 @@ libradv_files = files( 'radv_meta_resolve.c', 'radv_meta_resolve_cs.c', 'radv_meta_resolve_fs.c', + 'radv_nir_lower_ycbcr_textures.c', 'radv_nir_to_llvm.c', 'radv_pass.c', 'radv_pipeline.c', diff --git a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c new file mode 100644 index 0000000..e37cc10 --- /dev/null +++ b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c @@ -0,0 +1,371 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_private.h" +#include "radv_shader.h" +#include "vk_format.h" +#include "nir/nir.h" +#include "nir/nir_builder.h" + +struct ycbcr_state { + nir_builder *builder; + nir_ssa_def *image_size; + nir_tex_instr *origin_tex; + nir_deref_instr *tex_deref; + const struct radv_sampler_ycbcr_conversion *conversion; +}; + +static nir_ssa_def * +y_range(nir_builder *b, + nir_ssa_def *y_channel, + int bpc, + VkSamplerYcbcrRange range) +{ + switch (range) { + case VK_SAMPLER_YCBCR_RANGE_ITU_FULL: + return y_channel; + case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW: + return nir_fmul(b, + nir_fadd(b, + nir_fmul(b, y_channel, + nir_imm_float(b, pow(2, bpc) - 1)), + nir_imm_float(b, -16.0f * pow(2, bpc - 8))), + nir_frcp(b, nir_imm_float(b, 219.0f * pow(2, bpc - 8)))); + default: + unreachable("missing Ycbcr range"); + return NULL; + } +} + +static nir_ssa_def * +chroma_range(nir_builder *b, + nir_ssa_def *chroma_channel, + int bpc, + VkSamplerYcbcrRange range) +{ + switch (range) { + case VK_SAMPLER_YCBCR_RANGE_ITU_FULL: + return nir_fadd(b, chroma_channel, + nir_imm_float(b, -pow(2, bpc - 1) / (pow(2, bpc) - 1.0f))); + case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW: + return nir_fmul(b, + nir_fadd(b, + nir_fmul(b, chroma_channel, + nir_imm_float(b, pow(2, bpc) - 1)), + nir_imm_float(b, -128.0f * pow(2, bpc - 8))), + nir_frcp(b, nir_imm_float(b, 224.0f * pow(2, bpc - 8)))); + default: + unreachable("missing Ycbcr range"); + return NULL; + } +} + +typedef struct nir_const_value_3_4 { + nir_const_value v[3][4]; +} nir_const_value_3_4; + +static const nir_const_value_3_4 * +ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model) +{ + switch (model) { + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: { + static const nir_const_value_3_4 bt601 = { { + { { .f32 = 1.402f }, { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 0.0f } }, + { { .f32 = -0.714136286201022f }, { .f32 = 1.0f }, { .f32 = -0.344136286201022f }, { .f32 = 0.0f } }, + { { .f32 = 0.0f }, { .f32 = 1.0f }, { .f32 = 1.772f }, { .f32 = 0.0f } }, + } }; + + return &bt601; + } + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: { + static const nir_const_value_3_4 bt709 = { { + { { .f32 = 1.5748031496063f }, { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 0.0f } }, + { { .f32 = -0.468125209181067f }, { .f32 = 1.0f }, { .f32 = -0.187327487470334f }, { .f32 = 0.0f } }, + { { .f32 = 0.0f }, { .f32 = 1.0f }, { .f32 = 1.85563184264242f }, { .f32 = 0.0f } }, + } }; + + return &bt709; + } + case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: { + static const nir_const_value_3_4 bt2020 = { { + { { .f32 = 1.4746f }, { .f32 = 1.0f }, { .f32 = 0.0f }, { .f32 = 0.0f } }, + { { .f32 = -0.571353126843658f }, { .f32 = 1.0f }, { .f32 = -0.164553126843658f }, { .f32 = 0.0f } }, + { { .f32 = 0.0f }, { .f32 = 1.0f }, { .f32 = 1.8814f }, { .f32 = 0.0f } }, + } }; + + return &bt2020; + } + default: + unreachable("missing Ycbcr model"); + return NULL; + } +} + +static nir_ssa_def * +convert_ycbcr(struct ycbcr_state *state, + nir_ssa_def *raw_channels, + uint8_t bits) +{ + nir_builder *b = state->builder; + const struct radv_sampler_ycbcr_conversion *conversion = state->conversion; + + nir_ssa_def *expanded_channels = + nir_vec4(b, + chroma_range(b, nir_channel(b, raw_channels, 0), + bits, conversion->ycbcr_range), + y_range(b, nir_channel(b, raw_channels, 1), + bits, conversion->ycbcr_range), + chroma_range(b, nir_channel(b, raw_channels, 2), + bits, conversion->ycbcr_range), + nir_imm_float(b, 1.0f)); + + if (conversion->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + return expanded_channels; + + const nir_const_value_3_4 *conversion_matrix = + ycbcr_model_to_rgb_matrix(conversion->ycbcr_model); + + nir_ssa_def *converted_channels[] = { + nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[0])), + nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[1])), + nir_fdot4(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[2])) + }; + + return nir_vec4(b, + converted_channels[0], converted_channels[1], + converted_channels[2], nir_imm_float(b, 1.0f)); +} + +static nir_ssa_def * +create_plane_tex_instr_implicit(struct ycbcr_state *state, + uint32_t plane) +{ + nir_builder *b = state->builder; + nir_tex_instr *old_tex = state->origin_tex; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1); + + for (uint32_t i = 0; i < old_tex->num_srcs; i++) { + tex->src[i].src_type = old_tex->src[i].src_type; + nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex); + } + + tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane)); + tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane; + + tex->sampler_dim = old_tex->sampler_dim; + tex->dest_type = old_tex->dest_type; + + tex->op = old_tex->op; + tex->coord_components = old_tex->coord_components; + tex->is_new_style_shadow = old_tex->is_new_style_shadow; + tex->component = old_tex->component; + + tex->texture_index = old_tex->texture_index; + tex->texture_array_size = old_tex->texture_array_size; + tex->sampler_index = old_tex->sampler_index; + + nir_ssa_dest_init(&tex->instr, &tex->dest, + old_tex->dest.ssa.num_components, + nir_dest_bit_size(old_tex->dest), NULL); + nir_builder_instr_insert(b, &tex->instr); + + return &tex->dest.ssa; +} + +struct swizzle_info { + unsigned plane[4]; + unsigned swizzle[4]; +}; + +static struct swizzle_info +get_plane_swizzles(VkFormat format) +{ + int planes = vk_format_get_plane_count(format); + switch (planes) { + case 3: + return (struct swizzle_info) { + {2, 0, 1, 0}, + {0, 0, 0, 3} + }; + case 2: + return (struct swizzle_info) { + {1, 0, 1, 0}, + {1, 0, 0, 3} + }; + case 1: + return (struct swizzle_info) { + {0, 0, 0, 0}, + {0, 1, 2, 3} + }; + default: + unreachable("unhandled plane count for ycbcr swizzling"); + } +} + + +static nir_ssa_def * +build_swizzled_components(nir_builder *builder, + VkFormat format, + VkComponentMapping mapping, + nir_ssa_def **plane_values) +{ + struct swizzle_info plane_swizzle = get_plane_swizzles(format); + enum vk_swizzle swizzles[4]; + nir_ssa_def *values[4]; + + vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0,1,2,3}, swizzles); + + nir_ssa_def *zero = nir_imm_float(builder, 0.0f); + nir_ssa_def *one = nir_imm_float(builder, 1.0f); + + for (unsigned i = 0; i < 4; ++i) { + switch(swizzles[i]) { + case VK_SWIZZLE_X: + case VK_SWIZZLE_Y: + case VK_SWIZZLE_Z: + case VK_SWIZZLE_W: { + unsigned channel = swizzles[i] - VK_SWIZZLE_X; + values[i] = nir_channel(builder, + plane_values[plane_swizzle.plane[channel]], + plane_swizzle.swizzle[channel]); + break; + } + case VK_SWIZZLE_0: + values[i] = zero; + break; + case VK_SWIZZLE_1: + values[i] = one; + break; + default: + unreachable("unhandled swizzle"); + } + } + return nir_vec(builder, values, 4); +} + +static bool +try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout, + nir_builder *builder, + nir_tex_instr *tex) +{ + int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + assert(deref_src_idx >= 0); + nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + + nir_variable *var = nir_deref_instr_get_variable(deref); + const struct radv_descriptor_set_layout *set_layout = + layout->set[var->data.descriptor_set].layout; + const struct radv_descriptor_set_binding_layout *binding = + &set_layout->binding[var->data.binding]; + const struct radv_sampler_ycbcr_conversion *ycbcr_samplers = + radv_immutable_ycbcr_samplers(set_layout, var->data.binding); + + if (!ycbcr_samplers) + return false; + + /* For the following instructions, we don't apply any change and let the + * instruction apply to the first plane. + */ + if (tex->op == nir_texop_txs || + tex->op == nir_texop_query_levels || + tex->op == nir_texop_lod) + return false; + + assert(tex->texture_index == 0); + unsigned array_index = 0; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + if (!nir_src_is_const(deref->arr.index)) + return false; + array_index = nir_src_as_uint(deref->arr.index); + array_index = MIN2(array_index, binding->array_size - 1); + } + const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index; + + if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED) + return false; + + struct ycbcr_state state = { + .builder = builder, + .origin_tex = tex, + .tex_deref = deref, + .conversion = ycbcr_sampler, + }; + + builder->cursor = nir_before_instr(&tex->instr); + + VkFormat format = state.conversion->format; + const int plane_count = vk_format_get_plane_count(format); + nir_ssa_def *plane_values[3]; + + for (int p = 0; p < plane_count; ++p) { + plane_values[p] = create_plane_tex_instr_implicit(&state, p); + } + + nir_ssa_def *result = build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values); + if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) { + VkFormat first_format = vk_format_get_plane_format(format, 0); + result = convert_ycbcr(&state, result, vk_format_get_component_bits(first_format, VK_FORMAT_COLORSPACE_RGB, VK_SWIZZLE_X)); + } + + nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result)); + nir_instr_remove(&tex->instr); + + return true; +} + +bool +radv_nir_lower_ycbcr_textures(nir_shader *shader, + const struct radv_pipeline_layout *layout) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + bool function_progress = false; + nir_builder builder; + nir_builder_init(&builder, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + function_progress |= try_lower_tex_ycbcr(layout, &builder, tex); + } + } + + if (function_progress) { + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } + + progress |= function_progress; + } + + return progress; +} diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index fe2f286..ac367bf 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -337,6 +337,9 @@ struct radv_shader_slab { void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, bool allow_copies); +bool +radv_nir_lower_ycbcr_textures(nir_shader *shader, + const struct radv_pipeline_layout *layout); nir_shader * radv_shader_compile_to_nir(struct radv_device *device, -- 2.7.4