From 71a9af49107289439f281ab59b5f67f59064f0aa Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 22 Mar 2022 18:33:57 +0100 Subject: [PATCH] rusticl/mem: support read/write/copy ops for images Signed-off-by: Karol Herbst Acked-by: Alyssa Rosenzweig Part-of: --- src/gallium/frontends/rusticl/api/icd.rs | 105 +++--- src/gallium/frontends/rusticl/api/kernel.rs | 2 +- src/gallium/frontends/rusticl/api/memory.rs | 254 +++++++++++--- src/gallium/frontends/rusticl/core/context.rs | 75 +++++ src/gallium/frontends/rusticl/core/format.rs | 74 +++++ src/gallium/frontends/rusticl/core/kernel.rs | 8 +- src/gallium/frontends/rusticl/core/memory.rs | 365 ++++++++++++++++----- src/gallium/frontends/rusticl/mesa/pipe/context.rs | 23 ++ src/gallium/frontends/rusticl/mesa/pipe/screen.rs | 65 +++- .../frontends/rusticl/mesa/pipe/transfer.rs | 8 + 10 files changed, 800 insertions(+), 179 deletions(-) diff --git a/src/gallium/frontends/rusticl/api/icd.rs b/src/gallium/frontends/rusticl/api/icd.rs index 6387bba..4f6f135 100644 --- a/src/gallium/frontends/rusticl/api/icd.rs +++ b/src/gallium/frontends/rusticl/api/icd.rs @@ -973,52 +973,83 @@ extern "C" fn cl_enqueue_copy_buffer( } extern "C" fn cl_enqueue_read_image( - _command_queue: cl_command_queue, - _image: cl_mem, - _blocking_read: cl_bool, - _origin: *const usize, - _region: *const usize, - _row_pitch: usize, - _slice_pitch: usize, - _ptr: *mut ::std::os::raw::c_void, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + image: cl_mem, + blocking_read: cl_bool, + origin: *const usize, + region: *const usize, + row_pitch: usize, + slice_pitch: usize, + ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - println!("cl_enqueue_read_image not implemented"); - CL_OUT_OF_HOST_MEMORY + match_err!(enqueue_read_image( + command_queue, + image, + blocking_read, + origin, + region, + row_pitch, + slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event, + )) } extern "C" fn cl_enqueue_write_image( - _command_queue: cl_command_queue, - _image: cl_mem, - _blocking_write: cl_bool, - _origin: *const usize, - _region: *const usize, - _input_row_pitch: usize, - _input_slice_pitch: usize, - _ptr: *const ::std::os::raw::c_void, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + image: cl_mem, + blocking_write: cl_bool, + origin: *const usize, + region: *const usize, + input_row_pitch: usize, + input_slice_pitch: usize, + ptr: *const ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - println!("cl_enqueue_write_image not implemented"); - CL_OUT_OF_HOST_MEMORY + match_err!(enqueue_write_image( + command_queue, + image, + blocking_write, + origin, + region, + input_row_pitch, + input_slice_pitch, + ptr, + num_events_in_wait_list, + event_wait_list, + event, + )) } extern "C" fn cl_enqueue_copy_image( - _command_queue: cl_command_queue, - _src_image: cl_mem, - _dst_image: cl_mem, - _src_origin: *const usize, - _dst_origin: *const usize, - _region: *const usize, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + src_image: cl_mem, + dst_image: cl_mem, + src_origin: *const usize, + dst_origin: *const usize, + region: *const usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - println!("cl_enqueue_copy_image not implemented"); - CL_OUT_OF_HOST_MEMORY + match_err!(enqueue_copy_image( + command_queue, + src_image, + dst_image, + src_origin, + dst_origin, + region, + num_events_in_wait_list, + event_wait_list, + event, + )) } extern "C" fn cl_enqueue_copy_image_to_buffer( diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index 0bb43d1..2a68f1b 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -410,7 +410,7 @@ pub fn enqueue_ndrange_kernel( local_work_size, global_work_size, global_work_offset, - ) + )? }; create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb) diff --git a/src/gallium/frontends/rusticl/api/memory.rs b/src/gallium/frontends/rusticl/api/memory.rs index b52c1bd..6b9f5a5 100644 --- a/src/gallium/frontends/rusticl/api/memory.rs +++ b/src/gallium/frontends/rusticl/api/memory.rs @@ -8,6 +8,7 @@ use crate::api::icd::*; use crate::api::types::*; use crate::api::util::*; use crate::core::device::*; +use crate::core::format::*; use crate::core::memory::*; use crate::*; @@ -329,41 +330,9 @@ fn validate_image_format<'a>( ) -> CLResult<(&'a cl_image_format, u8)> { // CL_INVALID_IMAGE_FORMAT_DESCRIPTOR ... if image_format is NULL. let format = unsafe { image_format.as_ref() }.ok_or(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)?; - - let channels = match format.image_channel_order { - CL_R | CL_A | CL_DEPTH | CL_LUMINANCE | CL_INTENSITY => 1, - - CL_RG | CL_RA | CL_Rx => 2, - - CL_RGB | CL_RGx | CL_sRGB => 3, - - CL_RGBA | CL_ARGB | CL_BGRA | CL_ABGR | CL_RGBx | CL_sRGBA | CL_sBGRA | CL_sRGBx => 4, - - _ => return Err(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR), - }; - - let channel_size = match format.image_channel_data_type { - CL_SNORM_INT8 | CL_UNORM_INT8 | CL_SIGNED_INT8 | CL_UNSIGNED_INT8 => 1, - - CL_SNORM_INT16 | CL_UNORM_INT16 | CL_SIGNED_INT16 | CL_UNSIGNED_INT16 | CL_HALF_FLOAT - | CL_UNORM_SHORT_565 | CL_UNORM_SHORT_555 => 2, - - CL_SIGNED_INT32 - | CL_UNSIGNED_INT32 - | CL_FLOAT - | CL_UNORM_INT_101010 - | CL_UNORM_INT_101010_2 => 4, - - _ => return Err(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR), - }; - - let packed = [ - CL_UNORM_SHORT_565, - CL_UNORM_SHORT_555, - CL_UNORM_INT_101010, - CL_UNORM_INT_101010, - ] - .contains(&format.image_channel_data_type); + let pixel_size = format + .pixel_size() + .ok_or(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)?; // special validation let valid_combination = match format.image_channel_data_type { @@ -377,14 +346,7 @@ fn validate_image_format<'a>( return Err(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); } - Ok(( - format, - if packed { - channel_size - } else { - channels * channel_size - }, - )) + Ok((format, pixel_size)) } fn validate_image_desc( @@ -402,14 +364,11 @@ fn validate_image_desc( // image_type describes the image type and must be either CL_MEM_OBJECT_IMAGE1D, // CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, // CL_MEM_OBJECT_IMAGE2D_ARRAY, or CL_MEM_OBJECT_IMAGE3D. - let (dims, array) = match desc.image_type { - CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false), - CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true), - CL_MEM_OBJECT_IMAGE2D => (2, false), - CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true), - CL_MEM_OBJECT_IMAGE3D => (3, false), - _ => return Err(err), - }; + if !CL_IMAGE_TYPES.contains(&desc.image_type) { + return Err(err); + } + + let (dims, array) = desc.type_info(); // image_width is the width of the image in pixels. For a 2D image and image array, the image // width must be a value ≥ 1 and ≤ CL_DEVICE_IMAGE2D_MAX_WIDTH. For a 3D image, the image width @@ -495,6 +454,8 @@ fn validate_image_desc( if desc.image_row_pitch != 0 || desc.image_slice_pitch != 0 { return Err(err); } + desc.image_row_pitch = desc.image_width * elem_size; + desc.image_slice_pitch = desc.image_row_pitch * desc.image_height; } else { if desc.image_row_pitch == 0 { desc.image_row_pitch = desc.image_width * elem_size; @@ -729,7 +690,7 @@ pub fn create_image_with_properties( elem_size, host_ptr, props, - ))) + )?)) } pub fn create_image( @@ -1515,13 +1476,200 @@ pub fn enqueue_map_buffer( Box::new(|_, _| Ok(())), )?; - Ok(b.map(&q, offset, size, block)) + b.map(&q, offset, size, block) // TODO // CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for the device associated with queue. This error code is missing before version 1.1. // CL_MAP_FAILURE if there is a failure to map the requested region into the host address space. This error cannot occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR. // CL_INVALID_OPERATION if mapping would lead to overlapping regions being mapped for writing. } +pub fn enqueue_read_image( + command_queue: cl_command_queue, + image: cl_mem, + blocking_read: cl_bool, + origin: *const usize, + region: *const usize, + mut row_pitch: usize, + mut slice_pitch: usize, + ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let i = image.get_arc()?; + let block = check_cl_bool(blocking_read).ok_or(CL_INVALID_VALUE)?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + let pixel_size = i.image_format.pixel_size().unwrap() as usize; + + // CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same + if i.context != q.context { + return Err(CL_INVALID_CONTEXT); + } + + // CL_INVALID_OPERATION if clEnqueueReadImage is called on image which has been created with + // CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS. + if bit_check(i.flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if origin or region is NULL. + // CL_INVALID_VALUE if ptr is NULL. + if origin.is_null() || region.is_null() || ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if image is a 1D or 2D image and slice_pitch or input_slice_pitch is not 0. + if !i.image_desc.has_slice() && slice_pitch != 0 { + return Err(CL_INVALID_VALUE); + } + + let r = unsafe { CLVec::from_raw(region) }; + let o = unsafe { CLVec::from_raw(origin) }; + + // If row_pitch (or input_row_pitch) is set to 0, the appropriate row pitch is calculated based + // on the size of each element in bytes multiplied by width. + if row_pitch == 0 { + row_pitch = r[0] * pixel_size; + } + + // If slice_pitch (or input_slice_pitch) is set to 0, the appropriate slice pitch is calculated + // based on the row_pitch × height. + if slice_pitch == 0 { + slice_pitch = row_pitch * r[1]; + } + + create_and_queue( + q, + CL_COMMAND_READ_IMAGE, + evs, + event, + block, + Box::new(move |q, ctx| { + i.read_to_user_rect( + ptr, + q, + ctx, + &r, + &o, + i.image_desc.image_row_pitch, + i.image_desc.image_slice_pitch, + &CLVec::default(), + row_pitch, + slice_pitch, + ) + }), + ) + + //• CL_INVALID_VALUE if the region being read or written specified by origin and region is out of bounds. + //• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region. + //• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue. + //• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue. + //• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE). + //• CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write operations are blocking and the execution status of any of the events in event_wait_list is a negative integer value. +} + +pub fn enqueue_write_image( + command_queue: cl_command_queue, + image: cl_mem, + blocking_write: cl_bool, + origin: *const usize, + region: *const usize, + mut row_pitch: usize, + mut slice_pitch: usize, + ptr: *const ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let i = image.get_arc()?; + let block = check_cl_bool(blocking_write).ok_or(CL_INVALID_VALUE)?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + let pixel_size = i.image_format.pixel_size().unwrap() as usize; + + // CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same + if i.context != q.context { + return Err(CL_INVALID_CONTEXT); + } + + // CL_INVALID_OPERATION if clEnqueueWriteImage is called on image which has been created with + // CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS. + if bit_check(i.flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS) { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if origin or region is NULL. + // CL_INVALID_VALUE if ptr is NULL. + if origin.is_null() || region.is_null() || ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if image is a 1D or 2D image and slice_pitch or input_slice_pitch is not 0. + if !i.image_desc.has_slice() && slice_pitch != 0 { + return Err(CL_INVALID_VALUE); + } + + let r = unsafe { CLVec::from_raw(region) }; + let o = unsafe { CLVec::from_raw(origin) }; + + // If row_pitch (or input_row_pitch) is set to 0, the appropriate row pitch is calculated based + // on the size of each element in bytes multiplied by width. + if row_pitch == 0 { + row_pitch = r[0] * pixel_size; + } + + // If slice_pitch (or input_slice_pitch) is set to 0, the appropriate slice pitch is calculated + // based on the row_pitch × height. + if slice_pitch == 0 { + slice_pitch = row_pitch * r[1]; + } + + create_and_queue( + q, + CL_COMMAND_WRITE_BUFFER_RECT, + evs, + event, + block, + Box::new(move |q, ctx| { + i.write_from_user_rect( + ptr, + q, + ctx, + &r, + &CLVec::default(), + row_pitch, + slice_pitch, + &o, + i.image_desc.image_row_pitch, + i.image_desc.image_slice_pitch, + ) + }), + ) + + //• CL_INVALID_VALUE if the region being read or written specified by origin and region is out of bounds. + //• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region. + //• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue. + //• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue. + //• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE). + //• CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write operations are blocking and the execution status of any of the events in event_wait_list is a negative integer value. +} + +pub fn enqueue_copy_image( + _command_queue: cl_command_queue, + _src_image: cl_mem, + _dst_image: cl_mem, + _src_origin: *const usize, + _dst_origin: *const usize, + _region: *const usize, + _num_events_in_wait_list: cl_uint, + _event_wait_list: *const cl_event, + _event: *mut cl_event, +) -> CLResult<()> { + println!("enqueue_copy_image not implemented"); + Err(CL_OUT_OF_HOST_MEMORY) +} + pub fn enqueue_unmap_mem_object( command_queue: cl_command_queue, memobj: cl_mem, diff --git a/src/gallium/frontends/rusticl/core/context.rs b/src/gallium/frontends/rusticl/core/context.rs index ca939d6..0dd513f 100644 --- a/src/gallium/frontends/rusticl/core/context.rs +++ b/src/gallium/frontends/rusticl/core/context.rs @@ -3,6 +3,8 @@ extern crate rusticl_opencl_gen; use crate::api::icd::*; use crate::core::device::*; +use crate::core::format::*; +use crate::core::util::*; use crate::impl_cl_type_trait; use self::mesa_rust::pipe::resource::*; @@ -62,6 +64,79 @@ impl Context { } Ok(res) } + + pub fn create_texture( + &self, + desc: &cl_image_desc, + format: &cl_image_format, + ) -> CLResult, Arc>> { + let width = desc + .image_width + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let height = desc + .image_height + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let depth = desc + .image_depth + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let array_size = desc + .image_array_size + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let target = cl_mem_type_to_texture_target(desc.image_type); + let format = format.to_pipe_format().unwrap(); + + let mut res = HashMap::new(); + for dev in &self.devs { + let resource = dev + .screen() + .resource_create_texture(width, height, depth, array_size, target, format) + .ok_or(CL_OUT_OF_RESOURCES); + res.insert(Arc::clone(dev), Arc::new(resource?)); + } + Ok(res) + } + + pub fn create_texture_from_user( + &self, + desc: &cl_image_desc, + format: &cl_image_format, + user_ptr: *mut c_void, + ) -> CLResult, Arc>> { + let width = desc + .image_width + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let height = desc + .image_height + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let depth = desc + .image_depth + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let array_size = desc + .image_array_size + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let target = cl_mem_type_to_texture_target(desc.image_type); + let format = format.to_pipe_format().unwrap(); + + let mut res = HashMap::new(); + for dev in &self.devs { + let resource = dev + .screen() + .resource_create_texture_from_user( + width, height, depth, array_size, target, format, user_ptr, + ) + .ok_or(CL_OUT_OF_RESOURCES); + res.insert(Arc::clone(dev), Arc::new(resource?)); + } + Ok(res) + } } impl Drop for Context { diff --git a/src/gallium/frontends/rusticl/core/format.rs b/src/gallium/frontends/rusticl/core/format.rs index 4f3e6a7..f8502ac 100644 --- a/src/gallium/frontends/rusticl/core/format.rs +++ b/src/gallium/frontends/rusticl/core/format.rs @@ -179,3 +179,77 @@ pub const FORMATS: &[RusticlImageFormat] = &[ pipe_format::PIPE_FORMAT_B8G8R8A8_UNORM, ), ]; + +pub trait CLFormatInfo { + fn channels(&self) -> Option; + fn format_info(&self) -> Option<(u8, bool)>; + fn to_pipe_format(&self) -> Option; + + fn channel_size(&self) -> Option { + if let Some(packed) = self.is_packed() { + assert!(!packed); + self.format_info().map(|i| i.0) + } else { + None + } + } + + fn packed_size(&self) -> Option { + if let Some(packed) = self.is_packed() { + assert!(packed); + self.format_info().map(|i| i.0) + } else { + None + } + } + + fn is_packed(&self) -> Option { + self.format_info().map(|i| i.1) + } + + fn pixel_size(&self) -> Option { + if let Some(packed) = self.is_packed() { + if packed { + self.packed_size() + } else { + self.channels().zip(self.channel_size()).map(|(c, s)| c * s) + } + } else { + None + } + } +} + +impl CLFormatInfo for cl_image_format { + #[allow(non_upper_case_globals)] + fn channels(&self) -> Option { + match self.image_channel_order { + CL_R | CL_A | CL_DEPTH | CL_INTENSITY | CL_LUMINANCE => Some(1), + CL_RG | CL_RA | CL_Rx => Some(2), + CL_RGB | CL_RGx | CL_sRGB => Some(3), + CL_RGBA | CL_ARGB | CL_BGRA | CL_ABGR | CL_RGBx | CL_sRGBA | CL_sBGRA | CL_sRGBx => { + Some(4) + } + _ => None, + } + } + + fn format_info(&self) -> Option<(u8, bool)> { + match self.image_channel_data_type { + CL_SIGNED_INT8 | CL_UNSIGNED_INT8 | CL_SNORM_INT8 | CL_UNORM_INT8 => Some((1, false)), + CL_SIGNED_INT16 | CL_UNSIGNED_INT16 | CL_SNORM_INT16 | CL_UNORM_INT16 + | CL_HALF_FLOAT => Some((2, false)), + CL_SIGNED_INT32 | CL_UNSIGNED_INT32 | CL_FLOAT => Some((4, false)), + CL_UNORM_SHORT_555 | CL_UNORM_SHORT_565 => Some((2, true)), + CL_UNORM_INT_101010 | CL_UNORM_INT_101010_2 => Some((4, true)), + _ => None, + } + } + + fn to_pipe_format(&self) -> Option { + FORMATS + .iter() + .find(|f| f.cl_image_format == *self) + .map(|f| f.pipe) + } +} diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index 79e460f..c56c227 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -409,7 +409,7 @@ impl Kernel { block: &[usize], grid: &[usize], offsets: &[usize], - ) -> EventSig { + ) -> CLResult { let nir = self.nirs.get(&q.device).unwrap(); let mut block = create_kernel_arr::(block, 1); let mut grid = create_kernel_arr::(grid, 1); @@ -436,7 +436,7 @@ impl Kernel { KernelArgValue::Constant(c) => input.extend_from_slice(c), KernelArgValue::MemObject(mem) => { input.extend_from_slice(&mem.offset.to_ne_bytes()); - resource_info.push((Some(mem.get_res_of_dev(&q.device).clone()), arg.offset)); + resource_info.push((Some(mem.get_res_of_dev(&q.device)?.clone()), arg.offset)); } KernelArgValue::LocalMem(size) => { // TODO 32 bit @@ -491,7 +491,7 @@ impl Kernel { } let k = self.clone(); - Box::new(move |q, ctx| { + Ok(Box::new(move |q, ctx| { let nir = k.nirs.get(&q.device).unwrap(); let mut input = input.clone(); let mut resources = Vec::with_capacity(resource_info.len()); @@ -545,7 +545,7 @@ impl Kernel { } Ok(()) - }) + })) } pub fn access_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_access_qualifier { diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index 49be63e..289dff6 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -1,4 +1,5 @@ extern crate mesa_rust; +extern crate mesa_rust_gen; extern crate rusticl_opencl_gen; use crate::api::icd::*; @@ -6,14 +7,17 @@ use crate::api::types::*; use crate::api::util::*; use crate::core::context::*; use crate::core::device::*; +use crate::core::format::*; use crate::core::queue::*; use crate::impl_cl_type_trait; use self::mesa_rust::pipe::context::*; use self::mesa_rust::pipe::resource::*; use self::mesa_rust::pipe::transfer::*; +use self::mesa_rust_gen::*; use self::rusticl_opencl_gen::*; +use std::cmp; use std::collections::HashMap; use std::convert::TryInto; use std::ops::AddAssign; @@ -43,6 +47,97 @@ pub struct Mem { impl_cl_type_trait!(cl_mem, Mem, CL_INVALID_MEM_OBJECT); +pub trait CLImageDescInfo { + fn type_info(&self) -> (u8, bool); + fn pixels(&self) -> usize; + fn bx(&self) -> CLResult; + fn row_pitch(&self) -> CLResult; + fn slice_pitch(&self) -> CLResult; + + fn dims(&self) -> u8 { + self.type_info().0 + } + + fn has_slice(&self) -> bool { + self.dims() == 3 || self.is_array() + } + + fn is_array(&self) -> bool { + self.type_info().1 + } +} + +impl CLImageDescInfo for cl_image_desc { + fn type_info(&self) -> (u8, bool) { + match self.image_type { + CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false), + CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true), + CL_MEM_OBJECT_IMAGE2D => (2, false), + CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true), + CL_MEM_OBJECT_IMAGE3D => (3, false), + _ => panic!("unknown image_type {:x}", self.image_type), + } + } + + fn pixels(&self) -> usize { + let mut res = self.image_width; + let dims = self.dims(); + + if dims > 1 { + res *= self.image_height; + } + + if dims > 2 { + res *= self.image_depth; + } + + if self.is_array() { + res *= self.image_array_size; + } + + res + } + + fn bx(&self) -> CLResult { + let mut depth = if self.is_array() { + self.image_array_size + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)? + } else { + self.image_depth + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)? + }; + + let height = cmp::max(self.image_height, 1); + depth = cmp::max(depth, 1); + + Ok(pipe_box { + x: 0, + y: 0, + z: 0, + width: self + .image_width + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + height: height.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + depth: depth, + }) + } + + fn row_pitch(&self) -> CLResult { + self.image_row_pitch + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY) + } + + fn slice_pitch(&self) -> CLResult { + self.image_slice_pitch + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY) + } +} + fn sw_copy( src: *const c_void, dst: *mut c_void, @@ -53,6 +148,7 @@ fn sw_copy( dst_origin: &CLVec, dst_row_pitch: usize, dst_slice_pitch: usize, + pixel_size: u8, ) { for z in 0..region[2] { for y in 0..region[1] { @@ -60,13 +156,36 @@ fn sw_copy( ptr::copy_nonoverlapping( src.add((*src_origin + [0, y, z]) * [1, src_row_pitch, src_slice_pitch]), dst.add((*dst_origin + [0, y, z]) * [1, dst_row_pitch, dst_slice_pitch]), - region[0], + region[0] * pixel_size as usize, ) }; } } } +fn create_box( + origin: &CLVec, + region: &CLVec, + tex_type: cl_mem_object_type, +) -> CLResult { + let mut y = 1; + let mut z = 2; + + // array slice belongs to z/depth + if tex_type == CL_MEM_OBJECT_IMAGE1D_ARRAY { + (z, y) = (y, z); + } + + Ok(pipe_box { + x: origin[0].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + y: origin[y].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + z: origin[z].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + width: region[0].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + height: region[y].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + depth: region[z].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + }) +} + impl Mem { pub fn new_buffer( context: Arc, @@ -153,64 +272,126 @@ impl Mem { mem_type: cl_mem_object_type, flags: cl_mem_flags, image_format: &cl_image_format, - image_desc: cl_image_desc, + mut image_desc: cl_image_desc, image_elem_size: u8, host_ptr: *mut c_void, props: Vec, - ) -> Arc { - if bit_check( - flags, - CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, - ) { + ) -> CLResult> { + if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) { println!("host ptr semantics not implemented!"); } + // we have to sanitize the image_desc a little for internal use + let api_image_desc = image_desc; + let dims = image_desc.dims(); + let is_array = image_desc.is_array(); + if dims < 3 { + image_desc.image_depth = 1; + } + if dims < 2 { + image_desc.image_height = 1; + } + if !is_array { + image_desc.image_array_size = 1; + } + + let texture = if bit_check(flags, CL_MEM_USE_HOST_PTR) { + context.create_texture_from_user(&image_desc, image_format, host_ptr) + } else { + context.create_texture(&image_desc, image_format) + }?; + + if bit_check(flags, CL_MEM_COPY_HOST_PTR) { + let bx = image_desc.bx()?; + let stride = image_desc.row_pitch()?; + let layer_stride = image_desc.slice_pitch()?; + + for (d, r) in &texture { + d.helper_ctx() + .texture_subdata(r, &bx, host_ptr, stride, layer_stride); + } + } + let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) { host_ptr } else { ptr::null_mut() }; - Arc::new(Self { + Ok(Arc::new(Self { base: CLObjectBase::new(), context: context, parent: None, mem_type: mem_type, flags: flags, - size: 0, + size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize, offset: 0, host_ptr: host_ptr, image_format: *image_format, - image_desc: image_desc, + image_desc: api_image_desc, image_elem_size: image_elem_size, props: props, cbs: Mutex::new(Vec::new()), - res: None, + res: Some(texture), maps: Mutex::new(HashMap::new()), - }) + })) } pub fn is_buffer(&self) -> bool { self.mem_type == CL_MEM_OBJECT_BUFFER } + fn tx( + &self, + q: &Arc, + ctx: &Arc, + mut offset: usize, + size: usize, + blocking: bool, + ) -> CLResult { + let b = self.to_parent(&mut offset); + let r = b.get_res()?.get(&q.device).unwrap(); + + assert!(self.is_buffer()); + + Ok(ctx.buffer_map( + r, + offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + blocking, + )) + } + + fn tx_image( + &self, + q: &Arc, + ctx: &Arc, + bx: &pipe_box, + blocking: bool, + ) -> CLResult { + assert!(!self.is_buffer()); + + let r = self.get_res()?.get(&q.device).unwrap(); + Ok(ctx.texture_map(r, bx, blocking)) + } + pub fn has_same_parent(&self, other: &Self) -> bool { let a = self.parent.as_ref().map_or(self, |p| p); let b = other.parent.as_ref().map_or(other, |p| p); ptr::eq(a, b) } - fn get_res(&self) -> &HashMap, Arc> { + fn get_res(&self) -> CLResult<&HashMap, Arc>> { self.parent .as_ref() .map_or(self, |p| p.as_ref()) .res .as_ref() - .unwrap() + .ok_or(CL_OUT_OF_HOST_MEMORY) } - pub fn get_res_of_dev(&self, dev: &Arc) -> &Arc { - self.get_res().get(dev).unwrap() + pub fn get_res_of_dev(&self, dev: &Arc) -> CLResult<&Arc> { + Ok(self.get_res()?.get(dev).unwrap()) } fn to_parent<'a>(&'a self, offset: &mut usize) -> &'a Self { @@ -226,24 +407,18 @@ impl Mem { &self, q: &Arc, ctx: &Arc, - mut offset: usize, + offset: usize, ptr: *mut c_void, size: usize, ) -> CLResult<()> { - let b = self.to_parent(&mut offset); - let r = b.get_res().get(&q.device).unwrap(); - let tx = ctx.buffer_map( - r, - offset.try_into().unwrap(), - size.try_into().unwrap(), - true, - ); + assert!(self.is_buffer()); + + let tx = self.tx(q, ctx, offset, size, true)?; unsafe { ptr::copy_nonoverlapping(tx.ptr(), ptr, size); } - drop(tx); Ok(()) } @@ -255,8 +430,10 @@ impl Mem { ptr: *const c_void, size: usize, ) -> CLResult<()> { + assert!(self.is_buffer()); + let b = self.to_parent(&mut offset); - let r = b.get_res().get(&q.device).unwrap(); + let r = b.get_res()?.get(&q.device).unwrap(); ctx.buffer_subdata( r, offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, @@ -275,11 +452,13 @@ impl Mem { mut dst_offset: usize, size: usize, ) -> CLResult<()> { + assert!(self.is_buffer()); + let src = self.to_parent(&mut src_offset); let dst = dst.to_parent(&mut dst_offset); - let src_res = src.get_res().get(&q.device).unwrap(); - let dst_res = dst.get_res().get(&q.device).unwrap(); + let src_res = src.get_res()?.get(&q.device).unwrap(); + let dst_res = dst.get_res()?.get(&q.device).unwrap(); ctx.resource_copy_region( src_res, @@ -299,8 +478,10 @@ impl Mem { mut offset: usize, size: usize, ) -> CLResult<()> { + assert!(self.is_buffer()); + let b = self.to_parent(&mut offset); - let res = b.get_res().get(&q.device).unwrap(); + let res = b.get_res()?.get(&q.device).unwrap(); ctx.clear_buffer( res, pattern, @@ -318,29 +499,49 @@ impl Mem { region: &CLVec, src_origin: &CLVec, src_row_pitch: usize, - src_slice_pitch: usize, + mut src_slice_pitch: usize, dst_origin: &CLVec, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()> { - let mut offset = 0; - let b = self.to_parent(&mut offset); - let r = b.res.as_ref().unwrap().get(&q.device).unwrap(); - let tx = ctx.buffer_map(r, 0, self.size.try_into().unwrap(), true); + if self.is_buffer() { + let tx = self.tx(q, ctx, 0, self.size, true)?; + sw_copy( + src, + tx.ptr(), + region, + src_origin, + src_row_pitch, + src_slice_pitch, + dst_origin, + dst_row_pitch, + dst_slice_pitch, + 1, + ); + } else { + assert!(dst_row_pitch == self.image_desc.image_row_pitch); + assert!(dst_slice_pitch == self.image_desc.image_slice_pitch); + assert!(src_origin == &CLVec::default()); - sw_copy( - src, - unsafe { tx.ptr().add(offset) }, - region, - src_origin, - src_row_pitch, - src_slice_pitch, - dst_origin, - dst_row_pitch, - dst_slice_pitch, - ); + let res = self.get_res()?.get(&q.device).unwrap(); + let bx = create_box(dst_origin, region, self.mem_type)?; - drop(tx); + if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY { + src_slice_pitch = src_row_pitch; + } + + ctx.texture_subdata( + res, + &bx, + src, + src_row_pitch + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + src_slice_pitch + .try_into() + .map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + ); + } Ok(()) } @@ -351,19 +552,31 @@ impl Mem { ctx: &Arc, region: &CLVec, src_origin: &CLVec, - src_row_pitch: usize, - src_slice_pitch: usize, + mut src_row_pitch: usize, + mut src_slice_pitch: usize, dst_origin: &CLVec, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()> { - let mut offset = 0; - let b = self.to_parent(&mut offset); - let r = b.res.as_ref().unwrap().get(&q.device).unwrap(); - let tx = ctx.buffer_map(r, 0, self.size.try_into().unwrap(), true); + let tx; + let pixel_size; + + if self.is_buffer() { + tx = self.tx(q, ctx, 0, self.size, true)?; + pixel_size = 1; + } else { + assert!(dst_origin == &CLVec::default()); + + let bx = create_box(src_origin, region, self.mem_type)?; + tx = self.tx_image(q, ctx, &bx, true)?; + src_row_pitch = tx.row_pitch() as usize; + src_slice_pitch = tx.slice_pitch() as usize; + + pixel_size = self.image_format.pixel_size().unwrap(); + }; sw_copy( - unsafe { tx.ptr().add(offset) }, + tx.ptr(), dst, region, src_origin, @@ -372,9 +585,9 @@ impl Mem { dst_origin, dst_row_pitch, dst_slice_pitch, + pixel_size, ); - drop(tx); Ok(()) } @@ -391,21 +604,15 @@ impl Mem { dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()> { - let mut src_offset = 0; - let mut dst_offset = 0; - let src = self.to_parent(&mut src_offset); - let dst = dst.to_parent(&mut dst_offset); - - let res_src = src.res.as_ref().unwrap().get(&q.device).unwrap(); - let res_dst = dst.res.as_ref().unwrap().get(&q.device).unwrap(); + assert!(self.is_buffer()); - let tx_src = ctx.buffer_map(res_src, 0, src.size.try_into().unwrap(), true); - let tx_dst = ctx.buffer_map(res_dst, 0, dst.size.try_into().unwrap(), true); + let tx_src = self.tx(q, ctx, 0, self.size, true)?; + let tx_dst = dst.tx(q, ctx, 0, self.size, true)?; // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits) sw_copy( - unsafe { tx_src.ptr().add(src_offset) }, - unsafe { tx_dst.ptr().add(dst_offset) }, + tx_src.ptr(), + tx_dst.ptr(), region, src_origin, src_row_pitch, @@ -413,26 +620,24 @@ impl Mem { dst_origin, dst_row_pitch, dst_slice_pitch, + 1, ); - drop(tx_src); - drop(tx_dst); - Ok(()) } - // TODO use PIPE_MAP_UNSYNCHRONIZED for non blocking - pub fn map(&self, q: &Arc, mut offset: usize, size: usize, block: bool) -> *mut c_void { - let b = self.to_parent(&mut offset); + pub fn map( + &self, + q: &Arc, + offset: usize, + size: usize, + block: bool, + ) -> CLResult<*mut c_void> { + assert!(self.is_buffer()); - let res = b.res.as_ref().unwrap().get(&q.device).unwrap(); - let tx = q.device.helper_ctx().buffer_map( - res, - offset.try_into().unwrap(), - size.try_into().unwrap(), - block, - ); + let tx = self.tx(q, &q.device.helper_ctx(), offset, size, block)?; let ptr = tx.ptr(); + let mut lock = self.maps.lock().unwrap(); let e = lock.get_mut(&ptr); @@ -443,7 +648,7 @@ impl Mem { lock.insert(tx.ptr(), (1, tx)); } - ptr + Ok(ptr) } pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool { diff --git a/src/gallium/frontends/rusticl/mesa/pipe/context.rs b/src/gallium/frontends/rusticl/mesa/pipe/context.rs index 0204c78..6aa5784 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/context.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/context.rs @@ -55,6 +55,28 @@ impl PipeContext { } } + pub fn texture_subdata( + &self, + res: &PipeResource, + bx: &pipe_box, + data: *const c_void, + stride: u32, + layer_stride: u32, + ) { + unsafe { + self.pipe.as_ref().texture_subdata.unwrap()( + self.pipe.as_ptr(), + res.pipe(), + 0, + pipe_map_flags::PIPE_MAP_WRITE.0, // TODO PIPE_MAP_x + bx, + data, + stride, + layer_stride, + ) + } + } + pub fn clear_buffer(&self, res: &PipeResource, pattern: &[u8], offset: u32, size: u32) { unsafe { self.pipe.as_ref().clear_buffer.unwrap()( @@ -277,5 +299,6 @@ fn has_required_cbs(c: &pipe_context) -> bool { && c.resource_copy_region.is_some() && c.set_global_binding.is_some() && c.texture_map.is_some() + && c.texture_subdata.is_some() && c.texture_unmap.is_some() } diff --git a/src/gallium/frontends/rusticl/mesa/pipe/screen.rs b/src/gallium/frontends/rusticl/mesa/pipe/screen.rs index 1b546a0..51940c6 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/screen.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/screen.rs @@ -83,6 +83,20 @@ impl PipeScreen { ) } + fn resource_create(&self, tmpl: &pipe_resource) -> Option { + PipeResource::new(unsafe { (*self.screen).resource_create.unwrap()(self.screen, tmpl) }) + } + + fn resource_create_from_user( + &self, + tmpl: &pipe_resource, + mem: *mut c_void, + ) -> Option { + PipeResource::new(unsafe { + (*self.screen).resource_from_user_memory.unwrap()(self.screen, tmpl, mem) + }) + } + pub fn resource_create_buffer(&self, size: u32) -> Option { let mut tmpl = pipe_resource::default(); @@ -93,7 +107,7 @@ impl PipeScreen { tmpl.array_size = 1; tmpl.bind = PIPE_BIND_GLOBAL; - PipeResource::new(unsafe { (*self.screen).resource_create.unwrap()(self.screen, &tmpl) }) + self.resource_create(&tmpl) } pub fn resource_create_buffer_from_user( @@ -110,9 +124,52 @@ impl PipeScreen { tmpl.array_size = 1; tmpl.bind = PIPE_BIND_GLOBAL; - PipeResource::new(unsafe { - (*self.screen).resource_from_user_memory.unwrap()(self.screen, &tmpl, mem) - }) + self.resource_create_from_user(&tmpl, mem) + } + + pub fn resource_create_texture( + &self, + width: u32, + height: u16, + depth: u16, + array_size: u16, + target: pipe_texture_target, + format: pipe_format, + ) -> Option { + let mut tmpl = pipe_resource::default(); + + tmpl.set_target(target); + tmpl.set_format(format); + tmpl.width0 = width; + tmpl.height0 = height; + tmpl.depth0 = depth; + tmpl.array_size = array_size; + tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE; + + self.resource_create(&tmpl) + } + + pub fn resource_create_texture_from_user( + &self, + width: u32, + height: u16, + depth: u16, + array_size: u16, + target: pipe_texture_target, + format: pipe_format, + mem: *mut c_void, + ) -> Option { + let mut tmpl = pipe_resource::default(); + + tmpl.set_target(target); + tmpl.set_format(format); + tmpl.width0 = width; + tmpl.height0 = height; + tmpl.depth0 = depth; + tmpl.array_size = array_size; + tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE; + + self.resource_create_from_user(&tmpl, mem) } pub fn param(&self, cap: pipe_cap) -> i32 { diff --git a/src/gallium/frontends/rusticl/mesa/pipe/transfer.rs b/src/gallium/frontends/rusticl/mesa/pipe/transfer.rs index 8c9b223..9b9df65 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/transfer.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/transfer.rs @@ -56,6 +56,14 @@ impl PipeTransfer { self.ptr } + pub fn row_pitch(&self) -> u32 { + unsafe { (*self.pipe).stride } + } + + pub fn slice_pitch(&self) -> u32 { + unsafe { (*self.pipe).layer_stride } + } + pub fn with_ctx(self, ctx: &PipeContext) -> GuardedPipeTransfer { GuardedPipeTransfer { inner: self, -- 2.7.4