rusticl/kernel: implement image_format and image_order
authorKarol Herbst <kherbst@redhat.com>
Thu, 14 Apr 2022 14:33:21 +0000 (16:33 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 12 Sep 2022 05:58:13 +0000 (05:58 +0000)
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>

src/gallium/frontends/rusticl/api/util.rs
src/gallium/frontends/rusticl/core/kernel.rs
src/gallium/frontends/rusticl/mesa/compiler/nir.rs
src/gallium/frontends/rusticl/meson.build
src/gallium/frontends/rusticl/rusticl_mesa_inline_bindings_wrapper.c
src/gallium/frontends/rusticl/rusticl_mesa_inline_bindings_wrapper.h
src/gallium/frontends/rusticl/rusticl_nir.c
src/gallium/frontends/rusticl/rusticl_nir.h

index 02e6e06..4643327 100644 (file)
@@ -119,6 +119,7 @@ macro_rules! cl_prop_for_struct {
 }
 
 cl_prop_for_type!(cl_char);
+cl_prop_for_type!(cl_ushort);
 cl_prop_for_type!(cl_int);
 cl_prop_for_type!(cl_uint);
 cl_prop_for_type!(cl_ulong);
index 261129a..6504ec7 100644 (file)
@@ -52,6 +52,8 @@ pub enum InternalKernelArgType {
     GlobalWorkOffsets,
     PrintfBuffer,
     InlineSampler((cl_addressing_mode, cl_filter_mode, bool)),
+    FormatArray,
+    OrderArray,
 }
 
 #[derive(Clone)]
@@ -385,6 +387,35 @@ fn lower_and_optimize_nir_late(
         );
     }
 
+    nir.pass1(nir_shader_gather_info, nir.entrypoint());
+    if nir.num_images() > 0 {
+        res.push(InternalKernelArg {
+            kind: InternalKernelArgType::FormatArray,
+            offset: 0,
+            size: 2 * nir.num_images() as usize,
+        });
+
+        res.push(InternalKernelArg {
+            kind: InternalKernelArgType::OrderArray,
+            offset: 0,
+            size: 2 * nir.num_images() as usize,
+        });
+
+        lower_state.format_arr = nir.add_var(
+            nir_variable_mode::nir_var_uniform,
+            unsafe { glsl_array_type(glsl_int16_t_type(), nir.num_images() as u32, 2) },
+            args + res.len() - 2,
+            "image_formats",
+        );
+
+        lower_state.order_arr = nir.add_var(
+            nir_variable_mode::nir_var_uniform,
+            unsafe { glsl_array_type(glsl_int16_t_type(), nir.num_images() as u32, 2) },
+            args + res.len() - 1,
+            "image_orders",
+        );
+    }
+
     nir.pass2(
         nir_lower_vars_to_explicit_types,
         nir_variable_mode::nir_var_mem_shared
@@ -402,6 +433,7 @@ fn lower_and_optimize_nir_late(
     let mut compute_options = nir_lower_compute_system_values_options::default();
     compute_options.set_has_base_global_invocation_id(true);
     nir.pass1(nir_lower_compute_system_values, &compute_options);
+
     nir.pass1(rusticl_lower_intrinsics, &mut lower_state);
     nir.pass2(
         nir_lower_explicit_io,
@@ -493,6 +525,10 @@ impl Kernel {
         let mut samplers = Vec::new();
         let mut iviews = Vec::new();
         let mut sviews = Vec::new();
+        let mut tex_formats: Vec<u16> = Vec::new();
+        let mut tex_orders: Vec<u16> = Vec::new();
+        let mut img_formats: Vec<u16> = Vec::new();
+        let mut img_orders: Vec<u16> = Vec::new();
 
         for i in 0..3 {
             if block[i] == 0 {
@@ -520,10 +556,22 @@ impl Kernel {
                     if mem.is_buffer() {
                         input.extend_from_slice(&mem.offset.to_ne_bytes());
                         resource_info.push((Some(res.clone()), arg.offset));
-                    } else if arg.kind == KernelArgType::Image {
-                        iviews.push(res.pipe_image_view())
                     } else {
-                        sviews.push(res.clone())
+                        let (formats, orders) = if arg.kind == KernelArgType::Image {
+                            iviews.push(res.pipe_image_view());
+                            (&mut img_formats, &mut img_orders)
+                        } else {
+                            sviews.push(res.clone());
+                            (&mut tex_formats, &mut tex_orders)
+                        };
+
+                        assert!(arg.offset >= formats.len());
+
+                        formats.resize(arg.offset, 0);
+                        orders.resize(arg.offset, 0);
+
+                        formats.push(mem.image_format.image_channel_data_type as u16);
+                        orders.push(mem.image_format.image_channel_order as u16);
                     }
                 }
                 KernelArgValue::LocalMem(size) => {
@@ -582,6 +630,14 @@ impl Kernel {
                 InternalKernelArgType::InlineSampler(cl) => {
                     samplers.push(Sampler::cl_to_pipe(cl));
                 }
+                InternalKernelArgType::FormatArray => {
+                    input.extend_from_slice(&cl_prop::<&Vec<u16>>(&tex_formats));
+                    input.extend_from_slice(&cl_prop::<&Vec<u16>>(&img_formats));
+                }
+                InternalKernelArgType::OrderArray => {
+                    input.extend_from_slice(&cl_prop::<&Vec<u16>>(&tex_orders));
+                    input.extend_from_slice(&cl_prop::<&Vec<u16>>(&img_orders));
+                }
             }
         }
 
index ec11e9f..1f3f0ee 100644 (file)
@@ -116,6 +116,10 @@ impl NirShader {
         unsafe { pass(self.nir.as_ptr(), a, b, c) }
     }
 
+    pub fn entrypoint(&self) -> *mut nir_function_impl {
+        unsafe { nir_shader_get_entrypoint(self.nir.as_ptr()) }
+    }
+
     pub fn structurize(&mut self) {
         self.pass0(nir_lower_goto_ifs);
         self.pass0(nir_opt_dead_cf);
@@ -142,6 +146,10 @@ impl NirShader {
         )
     }
 
+    pub fn num_images(&self) -> u8 {
+        unsafe { (*self.nir.as_ptr()).info.num_images }
+    }
+
     pub fn reset_scratch_size(&self) {
         unsafe {
             (*self.nir.as_ptr()).scratch_size = 0;
index ad6a782..9a8f77e 100644 (file)
@@ -105,6 +105,16 @@ rusticl_bindgen_c_args = [
   '-fno-builtin-malloc',
 ]
 
+cl_c_args = [
+  '-DCL_USE_DEPRECATED_OPENCL_1_0_APIS',
+  '-DCL_USE_DEPRECATED_OPENCL_1_1_APIS',
+  '-DCL_USE_DEPRECATED_OPENCL_1_2_APIS',
+  '-DCL_USE_DEPRECATED_OPENCL_2_0_APIS',
+  '-DCL_USE_DEPRECATED_OPENCL_2_1_APIS',
+  '-DCL_USE_DEPRECATED_OPENCL_2_2_APIS',
+  '-DCL_TARGET_OPENCL_VERSION=300',
+]
+
 rusticl_opencl_bindings_rs = rust.bindgen(
   input : [
     'rusticl_opencl_bindings.h',
@@ -116,13 +126,7 @@ rusticl_opencl_bindings_rs = rust.bindgen(
   ],
   c_args : [
     rusticl_bindgen_c_args,
-    '-DCL_USE_DEPRECATED_OPENCL_1_0_APIS',
-    '-DCL_USE_DEPRECATED_OPENCL_1_1_APIS',
-    '-DCL_USE_DEPRECATED_OPENCL_1_2_APIS',
-    '-DCL_USE_DEPRECATED_OPENCL_2_0_APIS',
-    '-DCL_USE_DEPRECATED_OPENCL_2_1_APIS',
-    '-DCL_USE_DEPRECATED_OPENCL_2_2_APIS',
-    '-DCL_TARGET_OPENCL_VERSION=300',
+    cl_c_args,
   ],
   args : [
     rusticl_bindgen_args,
@@ -168,7 +172,10 @@ rusticl_mesa_bindings_inline_wrapper = static_library(
     inc_nir,
     inc_src,
   ],
-  c_args : pre_args,
+  c_args : [
+    pre_args,
+    cl_c_args,
+  ],
   dependencies: [
     idep_nir_headers,
   ],
index 2696595..e80f99d 100644 (file)
@@ -1,5 +1,11 @@
 #include "rusticl_mesa_inline_bindings_wrapper.h"
 
+nir_function_impl *
+nir_shader_get_entrypoint(const nir_shader *shader)
+{
+   return __nir_shader_get_entrypoint_wraped(shader);
+}
+
 void
 pipe_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
 {
index 9a7ab2c..b851c6e 100644 (file)
@@ -1,9 +1,13 @@
+#define nir_shader_get_entrypoint __nir_shader_get_entrypoint_wraped
 #define pipe_resource_reference __pipe_resource_reference_wraped
 #define util_format_pack_rgba __util_format_pack_rgba
+#include "nir.h"
 #include "util/u_inlines.h"
 #include "util/format/u_format.h"
+#undef nir_shader_get_entrypoint
 #undef pipe_resource_reference
 #undef util_format_pack_rgba
 
+nir_function_impl *nir_shader_get_entrypoint(const nir_shader *shader);
 void pipe_resource_reference(struct pipe_resource **dst, struct pipe_resource *src);
 void util_format_pack_rgba(enum pipe_format format, void *dst, const void *src, unsigned w);
index 3485f8c..542f735 100644 (file)
@@ -1,3 +1,5 @@
+#include "CL/cl.h"
+
 #include "nir.h"
 #include "nir_builder.h"
 
@@ -15,10 +17,42 @@ rusticl_lower_intrinsics_instr(
     nir_instr *instr,
     void* _state
 ) {
-    nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
+    nir_intrinsic_instr *intrins = nir_instr_as_intrinsic(instr);
     struct rusticl_lower_state *state = _state;
 
-    switch (intrinsic->intrinsic) {
+    switch (intrins->intrinsic) {
+    case nir_intrinsic_image_deref_format:
+    case nir_intrinsic_image_deref_order: {
+        assert(intrins->src[0].is_ssa);
+
+        int32_t offset;
+        nir_deref_instr *deref;
+        nir_ssa_def *val;
+        nir_variable *var;
+
+        if (intrins->intrinsic == nir_intrinsic_image_deref_format) {
+            offset = CL_SNORM_INT8;
+            var = state->format_arr;
+        } else {
+            offset = CL_R;
+            var = state->order_arr;
+        }
+
+        val = intrins->src[0].ssa;
+        // we put write images after read images
+        if (nir_intrinsic_access(intrins) & ACCESS_NON_WRITEABLE) {
+            val = nir_iadd_imm(b, val, b->shader->info.num_textures);
+        }
+
+        deref = nir_build_deref_var(b, var);
+        deref = nir_build_deref_array(b, deref, val);
+        val = nir_u2u(b, nir_load_deref(b, deref), 32);
+
+        // we have to fix up the value base
+        val = nir_iadd_imm(b, val, -offset);
+
+        return val;
+    }
     case nir_intrinsic_load_base_global_invocation_id:
         return nir_load_var(b, state->base_global_invoc_id);
     case nir_intrinsic_load_constant_base_ptr:
index 78ea550..fefd0b3 100644 (file)
@@ -2,6 +2,8 @@ struct rusticl_lower_state {
     nir_variable *base_global_invoc_id;
     nir_variable *const_buf;
     nir_variable *printf_buf;
+    nir_variable *format_arr;
+    nir_variable *order_arr;
 };
 
 bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);