rusticl/device: make it &'static
authorKarol Herbst <git@karolherbst.de>
Sat, 8 Jul 2023 16:41:32 +0000 (18:41 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 12 Jul 2023 15:18:22 +0000 (15:18 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24061>

13 files changed:
src/gallium/frontends/rusticl/api/context.rs
src/gallium/frontends/rusticl/api/device.rs
src/gallium/frontends/rusticl/api/icd.rs
src/gallium/frontends/rusticl/api/kernel.rs
src/gallium/frontends/rusticl/api/memory.rs
src/gallium/frontends/rusticl/api/program.rs
src/gallium/frontends/rusticl/api/queue.rs
src/gallium/frontends/rusticl/core/context.rs
src/gallium/frontends/rusticl/core/device.rs
src/gallium/frontends/rusticl/core/kernel.rs
src/gallium/frontends/rusticl/core/memory.rs
src/gallium/frontends/rusticl/core/program.rs
src/gallium/frontends/rusticl/core/queue.rs

index f58668b..1aff561 100644 (file)
@@ -1,9 +1,9 @@
-use crate::api::device::get_devs_for_type;
 use crate::api::icd::*;
 use crate::api::types::*;
 use crate::api::util::*;
 use crate::cl_closure;
 use crate::core::context::*;
+use crate::core::device::get_devs_for_type;
 use crate::core::platform::*;
 
 use mesa_rust_util::properties::Properties;
@@ -15,24 +15,18 @@ use std::collections::HashSet;
 use std::iter::FromIterator;
 use std::mem::MaybeUninit;
 use std::slice;
-use std::sync::Arc;
 
 #[cl_info_entrypoint(cl_get_context_info)]
 impl CLInfo<cl_context_info> for cl_context {
     fn query(&self, q: cl_context_info, _: &[u8]) -> CLResult<Vec<MaybeUninit<u8>>> {
         let ctx = self.get_ref()?;
         Ok(match q {
-            CL_CONTEXT_DEVICES => {
-                cl_prop::<&Vec<cl_device_id>>(
-                    &ctx.devs
-                        .iter()
-                        .map(|d| {
-                            // Note we use as_ptr here which doesn't increase the reference count.
-                            cl_device_id::from_ptr(Arc::as_ptr(d))
-                        })
-                        .collect(),
-                )
-            }
+            CL_CONTEXT_DEVICES => cl_prop::<Vec<cl_device_id>>(
+                ctx.devs
+                    .iter()
+                    .map(|&d| cl_device_id::from_ptr(d))
+                    .collect(),
+            ),
             CL_CONTEXT_NUM_DEVICES => cl_prop::<cl_uint>(ctx.devs.len() as u32),
             CL_CONTEXT_PROPERTIES => cl_prop::<&Properties<cl_context_properties>>(&ctx.properties),
             CL_CONTEXT_REFERENCE_COUNT => cl_prop::<cl_uint>(self.refcnt()?),
@@ -81,7 +75,7 @@ fn create_context(
     // Duplicate devices specified in devices are ignored.
     let set: HashSet<_> =
         HashSet::from_iter(unsafe { slice::from_raw_parts(devices, num_devices as usize) }.iter());
-    let devs: Result<_, _> = set.into_iter().map(cl_device_id::get_arc).collect();
+    let devs: Result<_, _> = set.into_iter().map(cl_device_id::get_ref).collect();
 
     Ok(cl_context::from_arc(Context::new(devs?, props)))
 }
index bea08a4..549eb3c 100644 (file)
@@ -15,7 +15,6 @@ use std::cmp::min;
 use std::ffi::CStr;
 use std::mem::{size_of, MaybeUninit};
 use std::ptr;
-use std::sync::Arc;
 
 const SPIRV_SUPPORT_STRING: &str = "SPIR-V_1.0 SPIR-V_1.1 SPIR-V_1.2 SPIR-V_1.3 SPIR-V_1.4";
 const SPIRV_SUPPORT: [cl_name_version; 5] = [
@@ -316,18 +315,6 @@ impl CLInfo<cl_device_info> for cl_device_id {
     }
 }
 
-fn devs() -> &'static Vec<Arc<Device>> {
-    &Platform::get().devs
-}
-
-pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
-    devs()
-        .iter()
-        .filter(|d| device_type & d.device_type(true) != 0)
-        .map(Arc::as_ref)
-        .collect()
-}
-
 #[cl_entrypoint]
 fn get_device_ids(
     platform: cl_platform_id,
index 84b942b..a72a9fc 100644 (file)
@@ -272,6 +272,28 @@ pub trait ReferenceCountedAPIPointer<T, const ERR: i32> {
         Ok(res)
     }
 
+    fn get_ref_vec_from_arr(objs: *const Self, count: u32) -> CLResult<Vec<&'static T>>
+    where
+        Self: Sized,
+    {
+        // CL spec requires validation for obj arrays, both values have to make sense
+        if objs.is_null() && count > 0 || !objs.is_null() && count == 0 {
+            return Err(CL_INVALID_VALUE);
+        }
+
+        let mut res = Vec::new();
+        if objs.is_null() || count == 0 {
+            return Ok(res);
+        }
+
+        for i in 0..count as usize {
+            unsafe {
+                res.push((*objs.add(i)).get_ref()?);
+            }
+        }
+        Ok(res)
+    }
+
     fn retain(&self) -> CLResult<()> {
         unsafe {
             Arc::increment_strong_count(self.get_ptr()?);
index 06439a7..c77903b 100644 (file)
@@ -82,10 +82,10 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
             if kernel.prog.devs.len() > 1 {
                 return Err(CL_INVALID_DEVICE);
             } else {
-                kernel.prog.devs[0].clone()
+                kernel.prog.devs[0]
             }
         } else {
-            dev.get_arc()?
+            dev.get_ref()?
         };
 
         // CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
@@ -95,12 +95,12 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
 
         Ok(match *q {
             CL_KERNEL_COMPILE_WORK_GROUP_SIZE => cl_prop::<[usize; 3]>(kernel.work_group_size),
-            CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(&dev)),
+            CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(dev)),
             CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => {
-                cl_prop::<usize>(kernel.preferred_simd_size(&dev))
+                cl_prop::<usize>(kernel.preferred_simd_size(dev))
             }
-            CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(&dev)),
-            CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(&dev)),
+            CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(dev)),
+            CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(dev)),
             // CL_INVALID_VALUE if param_name is not one of the supported values
             _ => return Err(CL_INVALID_VALUE),
         })
@@ -128,10 +128,10 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
             if kernel.prog.devs.len() > 1 {
                 return Err(CL_INVALID_DEVICE);
             } else {
-                kernel.prog.devs[0].clone()
+                kernel.prog.devs[0]
             }
         } else {
-            dev.get_arc()?
+            dev.get_ref()?
         };
 
         // CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
@@ -172,16 +172,16 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
 
         Ok(match q {
             CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE => {
-                cl_prop::<usize>(kernel.subgroups_for_block(&dev, input))
+                cl_prop::<usize>(kernel.subgroups_for_block(dev, input))
             }
             CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE => {
-                cl_prop::<usize>(kernel.subgroup_size_for_block(&dev, input))
+                cl_prop::<usize>(kernel.subgroup_size_for_block(dev, input))
             }
             CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT => {
                 let subgroups = input[0];
                 let mut res = vec![0; 3];
 
-                for subgroup_size in kernel.subgroup_sizes(&dev) {
+                for subgroup_size in kernel.subgroup_sizes(dev) {
                     let threads = subgroups * subgroup_size;
 
                     if threads > dev.max_threads_per_block() {
@@ -189,7 +189,7 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
                     }
 
                     let block = [threads, 1, 1];
-                    let real_subgroups = kernel.subgroups_for_block(&dev, &block);
+                    let real_subgroups = kernel.subgroups_for_block(dev, &block);
 
                     if real_subgroups == subgroups {
                         res = block.to_vec();
@@ -201,11 +201,11 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
                 cl_prop::<Vec<usize>>(res)
             }
             CL_KERNEL_MAX_NUM_SUB_GROUPS => {
-                let threads = kernel.max_threads_per_block(&dev);
+                let threads = kernel.max_threads_per_block(dev);
                 let max_groups = dev.max_subgroups();
 
                 let mut result = 0;
-                for sgs in kernel.subgroup_sizes(&dev) {
+                for sgs in kernel.subgroup_sizes(dev) {
                     result = cmp::max(result, threads / sgs);
                     result = cmp::min(result, max_groups as usize);
                 }
@@ -512,7 +512,7 @@ fn enqueue_ndrange_kernel(
 
     // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
     // for device associated with command_queue.
-    if k.prog.status(&q.device) != CL_BUILD_SUCCESS as cl_build_status {
+    if k.prog.status(q.device) != CL_BUILD_SUCCESS as cl_build_status {
         return Err(CL_INVALID_PROGRAM_EXECUTABLE);
     }
 
index eab661c..0f867c4 100644 (file)
@@ -395,7 +395,7 @@ fn validate_image_desc(
     image_desc: *const cl_image_desc,
     host_ptr: *mut ::std::os::raw::c_void,
     elem_size: usize,
-    devs: &[Arc<Device>],
+    devs: &[&Device],
 ) -> CLResult<(cl_image_desc, Option<Arc<Mem>>)> {
     // CL_INVALID_IMAGE_DESCRIPTOR if values specified in image_desc are not valid
     const err: cl_int = CL_INVALID_IMAGE_DESCRIPTOR;
index 72b4f67..78ba1ed 100644 (file)
@@ -33,18 +33,12 @@ impl CLInfo<cl_program_info> for cl_program {
                 let ptr = Arc::as_ptr(&prog.context);
                 cl_prop::<cl_context>(cl_context::from_ptr(ptr))
             }
-            CL_PROGRAM_DEVICES => {
-                cl_prop::<&Vec<cl_device_id>>(
-                    &prog
-                        .devs
-                        .iter()
-                        .map(|d| {
-                            // Note we use as_ptr here which doesn't increase the reference count.
-                            cl_device_id::from_ptr(Arc::as_ptr(d))
-                        })
-                        .collect(),
-                )
-            }
+            CL_PROGRAM_DEVICES => cl_prop::<Vec<cl_device_id>>(
+                prog.devs
+                    .iter()
+                    .map(|&d| cl_device_id::from_ptr(d))
+                    .collect(),
+            ),
             CL_PROGRAM_IL => match &prog.src {
                 ProgramSourceType::Il(il) => to_maybeuninit_vec(il.to_bin().to_vec()),
                 _ => Vec::new(),
@@ -82,12 +76,12 @@ impl CLInfoObj<cl_program_build_info, cl_device_id> for cl_program {
     }
 }
 
-fn validate_devices(
+fn validate_devices<'a>(
     device_list: *const cl_device_id,
     num_devices: cl_uint,
-    default: &[Arc<Device>],
-) -> CLResult<Vec<Arc<Device>>> {
-    let mut devs = cl_device_id::get_arc_vec_from_arr(device_list, num_devices)?;
+    default: &[&'a Device],
+) -> CLResult<Vec<&'a Device>> {
+    let mut devs = cl_device_id::get_ref_vec_from_arr(device_list, num_devices)?;
 
     // If device_list is a NULL value, the compile is performed for all devices associated with
     // program.
@@ -197,7 +191,7 @@ fn create_program_with_binary(
     binary_status: *mut cl_int,
 ) -> CLResult<cl_program> {
     let c = context.get_arc()?;
-    let devs = cl_device_id::get_arc_vec_from_arr(device_list, num_devices)?;
+    let devs = cl_device_id::get_ref_vec_from_arr(device_list, num_devices)?;
 
     // CL_INVALID_VALUE if device_list is NULL or num_devices is zero.
     if devs.is_empty() {
index 96ccc35..2402178 100644 (file)
@@ -23,11 +23,7 @@ impl CLInfo<cl_command_queue_info> for cl_command_queue {
                 let ptr = Arc::as_ptr(&queue.context);
                 cl_prop::<cl_context>(cl_context::from_ptr(ptr))
             }
-            CL_QUEUE_DEVICE => {
-                // Note we use as_ptr here which doesn't increase the reference count.
-                let ptr = Arc::as_ptr(&queue.device);
-                cl_prop::<cl_device_id>(cl_device_id::from_ptr(ptr))
-            }
+            CL_QUEUE_DEVICE => cl_prop::<cl_device_id>(cl_device_id::from_ptr(queue.device)),
             CL_QUEUE_DEVICE_DEFAULT => cl_prop::<cl_command_queue>(ptr::null_mut()),
             CL_QUEUE_PROPERTIES => cl_prop::<cl_command_queue_properties>(queue.props),
             CL_QUEUE_PROPERTIES_ARRAY => {
@@ -61,7 +57,7 @@ pub fn create_command_queue_impl(
     properties_v2: Option<Properties<cl_queue_properties>>,
 ) -> CLResult<cl_command_queue> {
     let c = context.get_arc()?;
-    let d = device.get_arc()?;
+    let d = device.get_ref()?.to_static().ok_or(CL_INVALID_DEVICE)?;
 
     // CL_INVALID_DEVICE if device [...] is not associated with context.
     if !c.devs.contains(&d) {
@@ -102,7 +98,7 @@ fn create_command_queue_with_properties(
     properties: *const cl_queue_properties,
 ) -> CLResult<cl_command_queue> {
     let c = context.get_arc()?;
-    let d = device.get_arc()?;
+    let d = device.get_ref()?.to_static().ok_or(CL_INVALID_DEVICE)?;
 
     let mut queue_properties = cl_command_queue_properties::default();
     let properties = if properties.is_null() {
index 7b7d5cd..858ec2d 100644 (file)
@@ -20,7 +20,7 @@ use std::sync::Mutex;
 
 pub struct Context {
     pub base: CLObjectBase<CL_INVALID_CONTEXT>,
-    pub devs: Vec<Arc<Device>>,
+    pub devs: Vec<&'static Device>,
     pub properties: Properties<cl_context_properties>,
     pub dtors: Mutex<Vec<Box<dyn Fn(cl_context)>>>,
     pub svm_ptrs: Mutex<BTreeMap<*const c_void, Layout>>,
@@ -30,7 +30,7 @@ impl_cl_type_trait!(cl_context, Context, CL_INVALID_CONTEXT);
 
 impl Context {
     pub fn new(
-        devs: Vec<Arc<Device>>,
+        devs: Vec<&'static Device>,
         properties: Properties<cl_context_properties>,
     ) -> Arc<Context> {
         Arc::new(Self {
@@ -48,10 +48,10 @@ impl Context {
         user_ptr: *mut c_void,
         copy: bool,
         res_type: ResourceType,
-    ) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
+    ) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
         let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
         let mut res = HashMap::new();
-        for dev in &self.devs {
+        for &dev in &self.devs {
             let mut resource = None;
 
             if !user_ptr.is_null() && !copy {
@@ -65,7 +65,7 @@ impl Context {
             }
 
             let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
-            res.insert(Arc::clone(dev), Arc::new(resource?));
+            res.insert(dev, Arc::new(resource?));
         }
 
         if !user_ptr.is_null() {
@@ -88,7 +88,7 @@ impl Context {
         user_ptr: *mut c_void,
         copy: bool,
         res_type: ResourceType,
-    ) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
+    ) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
         let width = desc
             .image_width
             .try_into()
@@ -108,7 +108,7 @@ impl Context {
         let target = cl_mem_type_to_texture_target(desc.image_type);
 
         let mut res = HashMap::new();
-        for dev in &self.devs {
+        for &dev in &self.devs {
             let mut resource = None;
 
             // we can't specify custom pitches/slices, so this won't work for non 1D images
@@ -125,7 +125,7 @@ impl Context {
             }
 
             let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
-            res.insert(Arc::clone(dev), Arc::new(resource?));
+            res.insert(dev, Arc::new(resource?));
         }
 
         if !user_ptr.is_null() {
index 64b72b4..e0350c1 100644 (file)
@@ -235,6 +235,19 @@ impl Device {
         Some(Arc::new(d))
     }
 
+    /// Converts a temporary reference to a static if and only if this device lives inside static
+    /// memory.
+    pub fn to_static(&self) -> Option<&'static Self> {
+        for dev in devs() {
+            let dev = dev.as_ref();
+            if self == dev {
+                return Some(dev);
+            }
+        }
+
+        None
+    }
+
     fn fill_format_tables(&mut self) {
         for f in FORMATS {
             let mut fs = HashMap::new();
@@ -937,3 +950,15 @@ impl Device {
         }
     }
 }
+
+fn devs() -> &'static Vec<Arc<Device>> {
+    &Platform::get().devs
+}
+
+pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
+    devs()
+        .iter()
+        .filter(|d| device_type & d.device_type(true) != 0)
+        .map(Arc::as_ref)
+        .collect()
+}
index 215161f..9a7abdc 100644 (file)
@@ -258,7 +258,7 @@ struct KernelDevStateInner {
 }
 
 struct KernelDevState {
-    states: HashMap<Arc<Device>, KernelDevStateInner>,
+    states: HashMap<&'static Device, KernelDevStateInner>,
 }
 
 impl Drop for KernelDevState {
@@ -272,10 +272,10 @@ impl Drop for KernelDevState {
 }
 
 impl KernelDevState {
-    fn new(nirs: &HashMap<Arc<Device>, Arc<NirShader>>) -> Arc<Self> {
+    fn new(nirs: &HashMap<&'static Device, Arc<NirShader>>) -> Arc<Self> {
         let states = nirs
             .iter()
-            .map(|(dev, nir)| {
+            .map(|(&dev, nir)| {
                 let mut cso = dev
                     .helper_ctx()
                     .create_compute_state(nir, nir.shared_size());
@@ -289,7 +289,7 @@ impl KernelDevState {
                 };
 
                 (
-                    dev.clone(),
+                    dev,
                     KernelDevStateInner {
                         nir: nir.clone(),
                         constant_buffer: cb,
@@ -871,7 +871,7 @@ impl Kernel {
         grid: &[usize],
         offsets: &[usize],
     ) -> CLResult<EventSig> {
-        let dev_state = self.dev_state.get(&q.device);
+        let dev_state = self.dev_state.get(q.device);
         let mut block = create_kernel_arr::<u32>(block, 1);
         let mut grid = create_kernel_arr::<u32>(grid, 1);
         let offsets = create_kernel_arr::<u64>(offsets, 0);
@@ -894,7 +894,7 @@ impl Kernel {
             &[0; 4]
         };
 
-        self.optimize_local_size(&q.device, &mut grid, &mut block);
+        self.optimize_local_size(q.device, &mut grid, &mut block);
 
         for (arg, val) in self.build.args.iter().zip(&self.values) {
             if arg.dead {
@@ -911,7 +911,7 @@ impl Kernel {
             match val.borrow().as_ref().unwrap() {
                 KernelArgValue::Constant(c) => input.extend_from_slice(c),
                 KernelArgValue::MemObject(mem) => {
-                    let res = mem.get_res_of_dev(&q.device)?;
+                    let res = mem.get_res_of_dev(q.device)?;
                     // If resource is a buffer and mem a 2D image, the 2d image was created from a
                     // buffer. Use strides and dimensions of 2d image
                     let app_img_info =
@@ -1038,7 +1038,7 @@ impl Kernel {
 
         let k = Arc::clone(self);
         Ok(Box::new(move |q, ctx| {
-            let dev_state = k.dev_state.get(&q.device);
+            let dev_state = k.dev_state.get(q.device);
             let mut input = input.clone();
             let mut resources = Vec::with_capacity(resource_info.len());
             let mut globals: Vec<*mut u32> = Vec::new();
index 343d450..5f4c0c1 100644 (file)
@@ -18,6 +18,7 @@ use mesa_rust_util::properties::Properties;
 use rusticl_opencl_gen::*;
 
 use std::cmp;
+use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::convert::TryInto;
 use std::mem::size_of;
@@ -45,7 +46,7 @@ impl MappingTransfer {
 }
 
 struct Mappings {
-    tx: HashMap<Arc<Device>, MappingTransfer>,
+    tx: HashMap<&'static Device, MappingTransfer>,
     maps: HashMap<*mut c_void, u32>,
 }
 
@@ -92,7 +93,7 @@ impl Mappings {
 
     fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
         if self.maps.is_empty() {
-            if let Some(tx) = self.tx.get(dev) {
+            if let Some(tx) = self.tx.get(&dev) {
                 if tx.pending == 0 {
                     self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
                 }
@@ -116,7 +117,7 @@ pub struct Mem {
     pub image_elem_size: u8,
     pub props: Vec<cl_mem_properties>,
     pub cbs: Mutex<Vec<Box<dyn Fn(cl_mem)>>>,
-    res: Option<HashMap<Arc<Device>, Arc<PipeResource>>>,
+    res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
     maps: Mutex<Mappings>,
 }
 
@@ -457,7 +458,7 @@ impl Mem {
 
         assert!(self.is_buffer());
 
-        let tx = if can_map_directly(&q.device, r) {
+        let tx = if can_map_directly(q.device, r) {
             ctx.buffer_map_directly(
                 r,
                 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
@@ -513,10 +514,10 @@ impl Mem {
     ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
         assert!(!self.is_buffer());
 
-        let r = self.get_res()?.get(&q.device).unwrap();
+        let r = self.get_res()?.get(q.device).unwrap();
         let ctx = q.device.helper_ctx();
 
-        let tx = if can_map_directly(&q.device, r) {
+        let tx = if can_map_directly(q.device, r) {
             ctx.texture_map_directly(r, bx, rw)
         } else {
             None
@@ -573,7 +574,7 @@ impl Mem {
             && bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
     }
 
-    fn get_res(&self) -> CLResult<&HashMap<Arc<Device>, Arc<PipeResource>>> {
+    fn get_res(&self) -> CLResult<&HashMap<&'static Device, Arc<PipeResource>>> {
         self.get_parent().res.as_ref().ok_or(CL_OUT_OF_HOST_MEMORY)
     }
 
@@ -992,17 +993,17 @@ impl Mem {
         ptr: *mut c_void,
     ) -> CLResult<()> {
         let mut lock = self.maps.lock().unwrap();
-        if !lock.increase_ref(&q.device, ptr) {
+        if !lock.increase_ref(q.device, ptr) {
             return Ok(());
         }
 
-        if self.has_user_shadow_buffer(&q.device)? {
+        if self.has_user_shadow_buffer(q.device)? {
             self.read_to_user(q, ctx, 0, self.host_ptr, self.size)
         } else {
             if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
                 let mut offset = 0;
                 let b = self.to_parent(&mut offset);
-                let res = b.get_res_of_dev(&q.device)?;
+                let res = b.get_res_of_dev(q.device)?;
                 let bx = create_pipe_box(
                     [offset, 0, 0].into(),
                     [self.size, 1, 1].into(),
@@ -1022,11 +1023,11 @@ impl Mem {
         ptr: *mut c_void,
     ) -> CLResult<()> {
         let mut lock = self.maps.lock().unwrap();
-        if !lock.increase_ref(&q.device, ptr) {
+        if !lock.increase_ref(q.device, ptr) {
             return Ok(());
         }
 
-        if self.has_user_shadow_buffer(&q.device)? {
+        if self.has_user_shadow_buffer(q.device)? {
             self.read_to_user_rect(
                 self.host_ptr,
                 q,
@@ -1040,8 +1041,8 @@ impl Mem {
                 self.image_desc.image_slice_pitch,
             )
         } else {
-            if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
-                let res = self.get_res_of_dev(&q.device)?;
+            if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) {
+                let res = self.get_res_of_dev(q.device)?;
                 let bx = self.image_desc.bx()?;
                 ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
             }
@@ -1080,7 +1081,7 @@ impl Mem {
         lock: &'a mut MutexGuard<Mappings>,
         rw: RWFlags,
     ) -> CLResult<&'a PipeTransfer> {
-        if !lock.tx.contains_key(&q.device) {
+        if let Entry::Vacant(e) = lock.tx.entry(q.device) {
             let (tx, res) = if self.is_buffer() {
                 self.tx_raw_async(q, rw)?
             } else {
@@ -1088,10 +1089,9 @@ impl Mem {
                 self.tx_image_raw_async(q, &bx, rw)?
             };
 
-            lock.tx
-                .insert(q.device.clone(), MappingTransfer::new(tx, res));
+            e.insert(MappingTransfer::new(tx, res));
         } else {
-            lock.mark_pending(&q.device);
+            lock.mark_pending(q.device);
         }
 
         Ok(&lock.tx.get_mut(&q.device).unwrap().tx)
@@ -1101,7 +1101,7 @@ impl Mem {
         assert!(self.is_buffer());
 
         let mut lock = self.maps.lock().unwrap();
-        let ptr = if self.has_user_shadow_buffer(&q.device)? {
+        let ptr = if self.has_user_shadow_buffer(q.device)? {
             self.host_ptr
         } else {
             let tx = self.map(q, &mut lock, RWFlags::RW)?;
@@ -1125,7 +1125,7 @@ impl Mem {
         let mut lock = self.maps.lock().unwrap();
 
         // we might have a host_ptr shadow buffer or image created from buffer
-        let ptr = if self.has_user_shadow_buffer(&q.device)? || self.is_parent_buffer() {
+        let ptr = if self.has_user_shadow_buffer(q.device)? || self.is_parent_buffer() {
             *row_pitch = self.image_desc.image_row_pitch;
             *slice_pitch = self.image_desc.image_slice_pitch;
 
@@ -1173,12 +1173,12 @@ impl Mem {
             return Ok(());
         }
 
-        let (needs_sync, shadow) = lock.decrease_ref(ptr, &q.device);
+        let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
         if needs_sync {
             if let Some(shadow) = shadow {
                 let mut offset = 0;
                 let b = self.to_parent(&mut offset);
-                let res = b.get_res_of_dev(&q.device)?;
+                let res = b.get_res_of_dev(q.device)?;
 
                 let bx = if b.is_buffer() {
                     create_pipe_box(
@@ -1191,7 +1191,7 @@ impl Mem {
                 };
 
                 ctx.resource_copy_region(shadow, res, &[offset as u32, 0, 0], &bx);
-            } else if self.has_user_shadow_buffer(&q.device)? {
+            } else if self.has_user_shadow_buffer(q.device)? {
                 if self.is_buffer() {
                     self.write_from_user(q, ctx, 0, self.host_ptr, self.size)?;
                 } else {
@@ -1211,7 +1211,7 @@ impl Mem {
             }
         }
 
-        lock.clean_up_tx(&q.device, ctx);
+        lock.clean_up_tx(q.device, ctx);
 
         Ok(())
     }
index 3bfe33a..bd6b143 100644 (file)
@@ -60,7 +60,7 @@ pub enum ProgramSourceType {
 pub struct Program {
     pub base: CLObjectBase<CL_INVALID_PROGRAM>,
     pub context: Arc<Context>,
-    pub devs: Vec<Arc<Device>>,
+    pub devs: Vec<&'static Device>,
     pub src: ProgramSourceType,
     build: Mutex<ProgramBuild>,
 }
@@ -69,14 +69,14 @@ impl_cl_type_trait!(cl_program, Program, CL_INVALID_PROGRAM);
 
 #[derive(Clone)]
 pub struct NirKernelBuild {
-    pub nirs: HashMap<Arc<Device>, Arc<NirShader>>,
+    pub nirs: HashMap<&'static Device, Arc<NirShader>>,
     pub args: Vec<KernelArg>,
     pub internal_args: Vec<InternalKernelArg>,
     pub attributes_string: String,
 }
 
 pub(super) struct ProgramBuild {
-    builds: HashMap<Arc<Device>, ProgramDevBuild>,
+    builds: HashMap<&'static Device, ProgramDevBuild>,
     spec_constants: HashMap<u32, nir_const_value>,
     kernels: Vec<String>,
     kernel_builds: HashMap<String, Arc<NirKernelBuild>>,
@@ -122,7 +122,7 @@ impl ProgramBuild {
             for d in self.devs_with_build() {
                 let (nir, args, internal_args) = convert_spirv_to_nir(self, kernel_name, &args, d);
                 let attributes_string = self.attribute_str(kernel_name, d);
-                nirs.insert(d.clone(), Arc::new(nir));
+                nirs.insert(d, Arc::new(nir));
                 args_set.insert(args);
                 internal_args_set.insert(internal_args);
                 attributes_string_set.insert(attributes_string);
@@ -163,11 +163,11 @@ impl ProgramBuild {
         self.builds.get_mut(dev).unwrap()
     }
 
-    fn devs_with_build(&self) -> Vec<&Arc<Device>> {
+    fn devs_with_build(&self) -> Vec<&'static Device> {
         self.builds
             .iter()
             .filter(|(_, build)| build.status == CL_BUILD_SUCCESS as cl_build_status)
-            .map(|(d, _)| d)
+            .map(|(&d, _)| d)
             .collect()
     }
 
@@ -285,11 +285,13 @@ fn prepare_options(options: &str, dev: &Device) -> Vec<CString> {
 }
 
 impl Program {
-    fn create_default_builds(devs: &[Arc<Device>]) -> HashMap<Arc<Device>, ProgramDevBuild> {
+    fn create_default_builds(
+        devs: &[&'static Device],
+    ) -> HashMap<&'static Device, ProgramDevBuild> {
         devs.iter()
-            .map(|d| {
+            .map(|&d| {
                 (
-                    d.clone(),
+                    d,
                     ProgramDevBuild {
                         spirv: None,
                         status: CL_BUILD_NONE,
@@ -302,7 +304,7 @@ impl Program {
             .collect()
     }
 
-    pub fn new(context: &Arc<Context>, devs: &[Arc<Device>], src: CString) -> Arc<Program> {
+    pub fn new(context: &Arc<Context>, devs: &[&'static Device], src: CString) -> Arc<Program> {
         Arc::new(Self {
             base: CLObjectBase::new(),
             context: context.clone(),
@@ -319,13 +321,13 @@ impl Program {
 
     pub fn from_bins(
         context: Arc<Context>,
-        devs: Vec<Arc<Device>>,
+        devs: Vec<&'static Device>,
         bins: &[&[u8]],
     ) -> Arc<Program> {
         let mut builds = HashMap::new();
         let mut kernels = HashSet::new();
 
-        for (d, b) in devs.iter().zip(bins) {
+        for (&d, b) in devs.iter().zip(bins) {
             let mut ptr = b.as_ptr();
             let bin_type;
             let spirv;
@@ -364,7 +366,7 @@ impl Program {
             }
 
             builds.insert(
-                d.clone(),
+                d,
                 ProgramDevBuild {
                     spirv: spirv,
                     status: CL_BUILD_SUCCESS as cl_build_status,
@@ -625,17 +627,16 @@ impl Program {
 
     pub fn link(
         context: Arc<Context>,
-        devs: &[Arc<Device>],
+        devs: &[&'static Device],
         progs: &[Arc<Program>],
         options: String,
     ) -> Arc<Program> {
-        let devs: Vec<Arc<Device>> = devs.iter().map(|d| (*d).clone()).collect();
         let mut builds = HashMap::new();
         let mut kernels = HashSet::new();
         let mut locks: Vec<_> = progs.iter().map(|p| p.build_info()).collect();
         let lib = options.contains("-create-library");
 
-        for d in &devs {
+        for &d in devs {
             let bins: Vec<_> = locks
                 .iter_mut()
                 .map(|l| l.dev_build(d).spirv.as_ref().unwrap())
@@ -661,7 +662,7 @@ impl Program {
             };
 
             builds.insert(
-                d.clone(),
+                d,
                 ProgramDevBuild {
                     spirv: spirv,
                     status: status,
@@ -685,7 +686,7 @@ impl Program {
         Arc::new(Self {
             base: CLObjectBase::new(),
             context: context,
-            devs: devs,
+            devs: devs.to_owned(),
             src: ProgramSourceType::Linked,
             build: Mutex::new(build),
         })
index 1777cde..f0d7334 100644 (file)
@@ -23,7 +23,7 @@ struct QueueState {
 pub struct Queue {
     pub base: CLObjectBase<CL_INVALID_COMMAND_QUEUE>,
     pub context: Arc<Context>,
-    pub device: Arc<Device>,
+    pub device: &'static Device,
     pub props: cl_command_queue_properties,
     pub props_v2: Option<Properties<cl_queue_properties>>,
     state: Mutex<QueueState>,
@@ -43,7 +43,7 @@ fn flush_events(evs: &mut Vec<Arc<Event>>, pipe: &PipeContext) {
 impl Queue {
     pub fn new(
         context: Arc<Context>,
-        device: Arc<Device>,
+        device: &'static Device,
         props: cl_command_queue_properties,
         props_v2: Option<Properties<cl_queue_properties>>,
     ) -> CLResult<Arc<Queue>> {