From 79b3c820ccc8a861143e0b26762f66b1c133a922 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 17 Apr 2022 19:00:57 +0200 Subject: [PATCH] rusticl/kernel: cache the nir as well Signed-off-by: Karol Herbst Acked-by: Alyssa Rosenzweig Part-of: --- src/gallium/frontends/rusticl/core/kernel.rs | 183 +++++++++++++++++++-- src/gallium/frontends/rusticl/core/program.rs | 15 ++ .../frontends/rusticl/mesa/compiler/clc/spirv.rs | 45 +++++ src/gallium/frontends/rusticl/mesa/compiler/nir.rs | 25 +++ src/gallium/frontends/rusticl/meson.build | 1 + .../frontends/rusticl/rusticl_mesa_bindings.h | 2 + src/gallium/frontends/rusticl/util/lib.rs | 1 + src/gallium/frontends/rusticl/util/serialize.rs | 26 +++ 8 files changed, 285 insertions(+), 13 deletions(-) create mode 100644 src/gallium/frontends/rusticl/util/serialize.rs diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index cecc0fd..867d562 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -11,6 +11,7 @@ use crate::impl_cl_type_trait; use mesa_rust::compiler::clc::*; use mesa_rust::compiler::nir::*; use mesa_rust_gen::*; +use mesa_rust_util::serialize::*; use rusticl_opencl_gen::*; use std::cell::RefCell; @@ -32,15 +33,15 @@ pub enum KernelArgValue { LocalMem(usize), } -#[derive(Hash, PartialEq, Eq, Clone)] +#[derive(Hash, PartialEq, Eq, Clone, Copy)] pub enum KernelArgType { - Constant, // for anything passed by value - Image, - Sampler, - Texture, - MemGlobal, - MemConstant, - MemLocal, + Constant = 0, // for anything passed by value + Image = 1, + Sampler = 2, + Texture = 3, + MemGlobal = 4, + MemConstant = 5, + MemLocal = 6, } #[derive(Hash, PartialEq, Eq, Clone)] @@ -139,6 +140,95 @@ impl KernelArg { } } } + + fn serialize(&self) -> Vec { + let mut bin = Vec::new(); + + bin.append(&mut self.spirv.serialize()); + bin.extend_from_slice(&self.size.to_ne_bytes()); + bin.extend_from_slice(&self.offset.to_ne_bytes()); + bin.extend_from_slice(&(self.dead as u8).to_ne_bytes()); + bin.extend_from_slice(&(self.kind as u8).to_ne_bytes()); + + bin + } + + fn deserialize(bin: &mut &[u8]) -> Option { + let spirv = spirv::SPIRVKernelArg::deserialize(bin)?; + let size = read_ne_usize(bin); + let offset = read_ne_usize(bin); + let dead = read_ne_u8(bin) == 1; + + let kind = match read_ne_u8(bin) { + 0 => KernelArgType::Constant, + 1 => KernelArgType::Image, + 2 => KernelArgType::Sampler, + 3 => KernelArgType::Texture, + 4 => KernelArgType::MemGlobal, + 5 => KernelArgType::MemConstant, + 6 => KernelArgType::MemLocal, + _ => return None, + }; + + Some(Self { + spirv: spirv, + kind: kind, + size: size, + offset: offset, + dead: dead, + }) + } +} + +impl InternalKernelArg { + fn serialize(&self) -> Vec { + let mut bin = Vec::new(); + + bin.extend_from_slice(&self.size.to_ne_bytes()); + bin.extend_from_slice(&self.offset.to_ne_bytes()); + + match self.kind { + InternalKernelArgType::ConstantBuffer => bin.push(0), + InternalKernelArgType::GlobalWorkOffsets => bin.push(1), + InternalKernelArgType::PrintfBuffer => bin.push(2), + InternalKernelArgType::InlineSampler((addr_mode, filter_mode, norm)) => { + bin.push(3); + bin.extend_from_slice(&addr_mode.to_ne_bytes()); + bin.extend_from_slice(&filter_mode.to_ne_bytes()); + bin.push(norm as u8); + } + InternalKernelArgType::FormatArray => bin.push(4), + InternalKernelArgType::OrderArray => bin.push(5), + } + + bin + } + + fn deserialize(bin: &mut &[u8]) -> Option { + let size = read_ne_usize(bin); + let offset = read_ne_usize(bin); + + let kind = match read_ne_u8(bin) { + 0 => InternalKernelArgType::ConstantBuffer, + 1 => InternalKernelArgType::GlobalWorkOffsets, + 2 => InternalKernelArgType::PrintfBuffer, + 3 => { + let addr_mode = read_ne_u32(bin); + let filter_mode = read_ne_u32(bin); + let norm = read_ne_u8(bin) == 1; + InternalKernelArgType::InlineSampler((addr_mode, filter_mode, norm)) + } + 4 => InternalKernelArgType::FormatArray, + 5 => InternalKernelArgType::OrderArray, + _ => return None, + }; + + Some(Self { + kind: kind, + size: size, + offset: offset, + }) + } } #[repr(C)] @@ -454,6 +544,36 @@ fn lower_and_optimize_nir_late( res } +fn deserialize_nir( + bin: &mut &[u8], + d: &Device, +) -> Option<(NirShader, Vec, Vec)> { + let nir_len = read_ne_usize(bin); + + let nir = NirShader::deserialize( + bin, + nir_len, + d.screen() + .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE), + )?; + + let arg_len = read_ne_usize(bin); + let mut args = Vec::with_capacity(arg_len); + for _ in 0..arg_len { + args.push(KernelArg::deserialize(bin)?); + } + + let arg_len = read_ne_usize(bin); + let mut internal_args = Vec::with_capacity(arg_len); + for _ in 0..arg_len { + internal_args.push(InternalKernelArg::deserialize(bin)?); + } + + assert!(bin.is_empty()); + + Some((nir, args, internal_args)) +} + fn convert_spirv_to_nir( p: &Program, name: &str, @@ -469,13 +589,50 @@ fn convert_spirv_to_nir( // TODO: we could run this in parallel? for d in p.devs_with_build() { - let mut nir = p.to_nir(name, d); + let cache = d.screen().shader_cache(); + let key = p.hash_key(d, name); + + let res = if let Some(cache) = &cache { + cache.get(&mut key.unwrap()).and_then(|entry| { + let mut bin: &[u8] = &entry; + deserialize_nir(&mut bin, d) + }) + } else { + None + }; + + let (nir, args, internal_args) = if let Some(res) = res { + res + } else { + let mut nir = p.to_nir(name, d); + + lower_and_optimize_nir_pre_inputs(d, &mut nir, &d.lib_clc); + let mut args = KernelArg::from_spirv_nir(&args, &mut nir); + let mut internal_args = lower_and_optimize_nir_late(d, &mut nir, args.len()); + KernelArg::assign_locations(&mut args, &mut internal_args, &mut nir); + + if let Some(cache) = cache { + let mut bin = Vec::new(); + let mut nir = nir.serialize(); - lower_and_optimize_nir_pre_inputs(d, &mut nir, &d.lib_clc); + bin.extend_from_slice(&nir.len().to_ne_bytes()); + bin.append(&mut nir); + + bin.extend_from_slice(&args.len().to_ne_bytes()); + for arg in &args { + bin.append(&mut arg.serialize()); + } + + bin.extend_from_slice(&internal_args.len().to_ne_bytes()); + for arg in &internal_args { + bin.append(&mut arg.serialize()); + } + + cache.put(&bin, &mut key.unwrap()); + } - let mut args = KernelArg::from_spirv_nir(&args, &mut nir); - let mut internal_args = lower_and_optimize_nir_late(d, &mut nir, args.len()); - KernelArg::assign_locations(&mut args, &mut internal_args, &mut nir); + (nir, args, internal_args) + }; args_set.insert(args); internal_args_set.insert(internal_args); diff --git a/src/gallium/frontends/rusticl/core/program.rs b/src/gallium/frontends/rusticl/core/program.rs index 6c36b75..23e7f65 100644 --- a/src/gallium/frontends/rusticl/core/program.rs +++ b/src/gallium/frontends/rusticl/core/program.rs @@ -436,6 +436,21 @@ impl Program { }) } + pub(super) fn hash_key(&self, dev: &Arc, name: &str) -> Option { + if let Some(cache) = dev.screen().shader_cache() { + let mut lock = self.build_info(); + let info = Self::dev_build_info(&mut lock, dev); + assert_eq!(info.status, CL_BUILD_SUCCESS as cl_build_status); + + let spirv = info.spirv.as_ref().unwrap(); + let mut bin = spirv.to_bin().to_vec(); + bin.extend_from_slice(name.as_bytes()); + Some(cache.gen_key(&bin)) + } else { + None + } + } + pub fn devs_with_build(&self) -> Vec<&Arc> { let mut lock = self.build_info(); self.devs diff --git a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs index be16d92..01415c6 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs @@ -3,6 +3,7 @@ use crate::pipe::screen::*; use crate::util::disk_cache::*; use mesa_rust_gen::*; +use mesa_rust_util::serialize::*; use mesa_rust_util::string::*; use std::ffi::CString; @@ -316,3 +317,47 @@ impl Drop for SPIRVBin { } } } + +impl SPIRVKernelArg { + pub fn serialize(&self) -> Vec { + let mut res = Vec::new(); + + let name_arr = self.name.as_bytes(); + let type_name_arr = self.type_name.as_bytes(); + + res.extend_from_slice(&name_arr.len().to_ne_bytes()); + res.extend_from_slice(name_arr); + res.extend_from_slice(&type_name_arr.len().to_ne_bytes()); + res.extend_from_slice(type_name_arr); + res.extend_from_slice(&u32::to_ne_bytes(self.access_qualifier.0)); + res.extend_from_slice(&u32::to_ne_bytes(self.type_qualifier.0)); + res.push(self.address_qualifier as u8); + + res + } + + pub fn deserialize(bin: &mut &[u8]) -> Option { + let name_len = read_ne_usize(bin); + let name = read_string(bin, name_len)?; + let type_len = read_ne_usize(bin); + let type_name = read_string(bin, type_len)?; + let access_qualifier = read_ne_u32(bin); + let type_qualifier = read_ne_u32(bin); + + let address_qualifier = match read_ne_u8(bin) { + 0 => clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE, + 1 => clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_CONSTANT, + 2 => clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_LOCAL, + 3 => clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_GLOBAL, + _ => return None, + }; + + Some(Self { + name: name, + type_name: type_name, + access_qualifier: clc_kernel_arg_access_qualifier(access_qualifier), + address_qualifier: address_qualifier, + type_qualifier: clc_kernel_arg_type_qualifier(type_qualifier), + }) + } +} diff --git a/src/gallium/frontends/rusticl/mesa/compiler/nir.rs b/src/gallium/frontends/rusticl/mesa/compiler/nir.rs index ead0093..a0bd690 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/nir.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/nir.rs @@ -67,6 +67,31 @@ impl NirShader { NonNull::new(nir).map(|nir| Self { nir: nir }) } + pub fn deserialize( + input: &mut &[u8], + len: usize, + options: *const nir_shader_compiler_options, + ) -> Option { + let mut reader = blob_reader::default(); + + let (bin, rest) = input.split_at(len); + *input = rest; + + unsafe { + blob_reader_init(&mut reader, bin.as_ptr().cast(), len); + Self::new(nir_deserialize(ptr::null_mut(), options, &mut reader)) + } + } + + pub fn serialize(&self) -> Vec { + let mut blob = blob::default(); + unsafe { + blob_init(&mut blob); + nir_serialize(&mut blob, self.nir.as_ptr(), false); + slice::from_raw_parts(blob.data, blob.size).to_vec() + } + } + pub fn print(&self) { unsafe { nir_print_shader(self.nir.as_ptr(), stderr) }; } diff --git a/src/gallium/frontends/rusticl/meson.build b/src/gallium/frontends/rusticl/meson.build index ec6d94d..2977009 100644 --- a/src/gallium/frontends/rusticl/meson.build +++ b/src/gallium/frontends/rusticl/meson.build @@ -198,6 +198,7 @@ rusticl_mesa_bindings_rs = rust.bindgen( ], args : [ rusticl_bindgen_args, + '--whitelist-function', 'blob_.*', '--whitelist-function', 'clc_.*', '--whitelist-function', 'disk_cache_.*', '--whitelist-function', 'free', diff --git a/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h b/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h index fdfaa20..5ed42ba 100644 --- a/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h +++ b/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h @@ -3,6 +3,7 @@ #include "compiler/clc/clc.h" #include "compiler/clc/clc_helpers.h" #include "compiler/shader_enums.h" +#include "nir_serialize.h" #include "nir_types.h" #include "spirv/nir_spirv.h" @@ -12,6 +13,7 @@ #include "pipe/p_state.h" #include "pipe-loader/pipe_loader.h" +#include "util/blob.h" #include "util/disk_cache.h" #include "util/u_printf.h" #include "util/u_sampler.h" diff --git a/src/gallium/frontends/rusticl/util/lib.rs b/src/gallium/frontends/rusticl/util/lib.rs index 23389ac..d735982 100644 --- a/src/gallium/frontends/rusticl/util/lib.rs +++ b/src/gallium/frontends/rusticl/util/lib.rs @@ -1,4 +1,5 @@ pub mod assert; pub mod properties; pub mod ptr; +pub mod serialize; pub mod string; diff --git a/src/gallium/frontends/rusticl/util/serialize.rs b/src/gallium/frontends/rusticl/util/serialize.rs new file mode 100644 index 0000000..1c0afc0 --- /dev/null +++ b/src/gallium/frontends/rusticl/util/serialize.rs @@ -0,0 +1,26 @@ +use std::convert::TryInto; +use std::mem::size_of; + +pub fn read_ne_u8(input: &mut &[u8]) -> u8 { + let (int_bytes, rest) = input.split_at(size_of::()); + *input = rest; + u8::from_ne_bytes(int_bytes.try_into().unwrap()) +} + +pub fn read_ne_u32(input: &mut &[u8]) -> u32 { + let (int_bytes, rest) = input.split_at(size_of::()); + *input = rest; + u32::from_ne_bytes(int_bytes.try_into().unwrap()) +} + +pub fn read_ne_usize(input: &mut &[u8]) -> usize { + let (int_bytes, rest) = input.split_at(size_of::()); + *input = rest; + usize::from_ne_bytes(int_bytes.try_into().unwrap()) +} + +pub fn read_string(input: &mut &[u8], len: usize) -> Option { + let (string_bytes, rest) = input.split_at(len); + *input = rest; + String::from_utf8(string_bytes.to_vec()).ok() +} -- 2.7.4