1 use crate::api::icd::*;
2 use crate::api::types::*;
3 use crate::api::util::*;
4 use crate::core::context::*;
5 use crate::core::device::*;
6 use crate::core::format::*;
7 use crate::core::queue::*;
8 use crate::core::util::*;
9 use crate::impl_cl_type_trait;
11 use mesa_rust::pipe::context::*;
12 use mesa_rust::pipe::resource::*;
13 use mesa_rust::pipe::screen::ResourceType;
14 use mesa_rust::pipe::transfer::*;
16 use mesa_rust_util::math::*;
17 use mesa_rust_util::properties::Properties;
18 use rusticl_opencl_gen::*;
21 use std::collections::hash_map::Entry;
22 use std::collections::HashMap;
23 use std::convert::TryInto;
24 use std::mem::size_of;
25 use std::ops::AddAssign;
26 use std::os::raw::c_void;
30 use std::sync::MutexGuard;
32 struct MappingTransfer {
34 shadow: Option<PipeResource>,
38 impl MappingTransfer {
39 fn new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self {
49 tx: HashMap<&'static Device, MappingTransfer>,
50 maps: HashMap<*mut c_void, u32>,
54 fn new() -> Mutex<Self> {
61 fn mark_pending(&mut self, dev: &Device) {
62 self.tx.get_mut(dev).unwrap().pending += 1;
65 fn unmark_pending(&mut self, dev: &Device) {
66 if let Some(tx) = self.tx.get_mut(dev) {
71 fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool {
72 let res = self.maps.is_empty();
73 *self.maps.entry(ptr).or_default() += 1;
74 self.unmark_pending(dev);
78 fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) {
79 if let Some(r) = self.maps.get_mut(&ptr) {
83 self.maps.remove(&ptr);
86 if self.maps.is_empty() {
87 let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref());
88 return (true, shadow);
94 fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
95 if self.maps.is_empty() {
96 if let Some(tx) = self.tx.get(&dev) {
98 self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
106 pub base: CLObjectBase<CL_INVALID_MEM_OBJECT>,
107 pub context: Arc<Context>,
108 pub parent: Option<Arc<Mem>>,
109 pub mem_type: cl_mem_object_type,
110 pub flags: cl_mem_flags,
113 pub host_ptr: *mut c_void,
114 pub image_format: cl_image_format,
115 pub pipe_format: pipe_format,
116 pub image_desc: cl_image_desc,
117 pub image_elem_size: u8,
118 pub props: Vec<cl_mem_properties>,
119 pub cbs: Mutex<Vec<Box<dyn Fn(cl_mem)>>>,
120 res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
121 maps: Mutex<Mappings>,
124 impl_cl_type_trait!(cl_mem, Mem, CL_INVALID_MEM_OBJECT);
126 pub trait CLImageDescInfo {
127 fn type_info(&self) -> (u8, bool);
128 fn pixels(&self) -> usize;
129 fn bx(&self) -> CLResult<pipe_box>;
130 fn row_pitch(&self) -> CLResult<u32>;
131 fn slice_pitch(&self) -> usize;
132 fn width(&self) -> CLResult<u32>;
133 fn height(&self) -> CLResult<u32>;
134 fn size(&self) -> CLVec<usize>;
136 fn dims(&self) -> u8 {
140 fn dims_with_array(&self) -> u8 {
141 let array: u8 = self.is_array().into();
145 fn has_slice(&self) -> bool {
146 self.dims() == 3 || self.is_array()
149 fn is_array(&self) -> bool {
154 impl CLImageDescInfo for cl_image_desc {
155 fn type_info(&self) -> (u8, bool) {
156 match self.image_type {
157 CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
158 CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
159 CL_MEM_OBJECT_IMAGE2D => (2, false),
160 CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
161 CL_MEM_OBJECT_IMAGE3D => (3, false),
162 _ => panic!("unknown image_type {:x}", self.image_type),
166 fn pixels(&self) -> usize {
167 let mut res = self.image_width;
168 let dims = self.dims();
171 res *= self.image_height;
175 res *= self.image_depth;
179 res *= self.image_array_size;
185 fn size(&self) -> CLVec<usize> {
186 let mut height = cmp::max(self.image_height, 1);
187 let mut depth = cmp::max(self.image_depth, 1);
189 match self.image_type {
190 CL_MEM_OBJECT_IMAGE1D_ARRAY => height = self.image_array_size,
191 CL_MEM_OBJECT_IMAGE2D_ARRAY => depth = self.image_array_size,
195 CLVec::new([self.image_width, height, depth])
198 fn bx(&self) -> CLResult<pipe_box> {
199 create_pipe_box(CLVec::default(), self.size(), self.image_type)
202 fn row_pitch(&self) -> CLResult<u32> {
205 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
208 fn slice_pitch(&self) -> usize {
209 self.image_slice_pitch
212 fn width(&self) -> CLResult<u32> {
215 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
218 fn height(&self) -> CLResult<u32> {
221 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
228 region: &CLVec<usize>,
229 src_origin: &CLVec<usize>,
230 src_row_pitch: usize,
231 src_slice_pitch: usize,
232 dst_origin: &CLVec<usize>,
233 dst_row_pitch: usize,
234 dst_slice_pitch: usize,
237 for z in 0..region[2] {
238 for y in 0..region[1] {
240 ptr::copy_nonoverlapping(
242 (*src_origin + [0, y, z])
243 * [pixel_size as usize, src_row_pitch, src_slice_pitch],
246 (*dst_origin + [0, y, z])
247 * [pixel_size as usize, dst_row_pitch, dst_slice_pitch],
249 region[0] * pixel_size as usize,
256 /// helper function to determine if we can just map the resource in question or if we have to go
257 /// through a shdow buffer to let the CPU access the resources memory
258 fn can_map_directly(dev: &Device, res: &PipeResource) -> bool {
259 // there are two aprts to this check:
260 // 1. is the resource located in system RAM
261 // 2. has the resource a linear memory layout
262 // we do not want to map memory over the PCIe bus as this generally leads to bad performance.
263 (dev.unified_memory() || res.is_staging() || res.is_user)
264 && (res.is_buffer() || res.is_linear())
269 context: Arc<Context>,
272 host_ptr: *mut c_void,
273 props: Vec<cl_mem_properties>,
274 ) -> CLResult<Arc<Mem>> {
275 let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
276 ResourceType::Staging
281 let buffer = context.create_buffer(
284 bit_check(flags, CL_MEM_COPY_HOST_PTR),
288 let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
295 base: CLObjectBase::new(),
298 mem_type: CL_MEM_OBJECT_BUFFER,
303 image_format: cl_image_format::default(),
304 pipe_format: pipe_format::PIPE_FORMAT_NONE,
305 image_desc: cl_image_desc::default(),
308 cbs: Mutex::new(Vec::new()),
310 maps: Mappings::new(),
314 pub fn new_sub_buffer(
320 let host_ptr = if parent.host_ptr.is_null() {
323 unsafe { parent.host_ptr.add(offset) }
327 base: CLObjectBase::new(),
328 context: parent.context.clone(),
329 parent: Some(parent),
330 mem_type: CL_MEM_OBJECT_BUFFER,
335 image_format: cl_image_format::default(),
336 pipe_format: pipe_format::PIPE_FORMAT_NONE,
337 image_desc: cl_image_desc::default(),
340 cbs: Mutex::new(Vec::new()),
342 maps: Mappings::new(),
347 context: Arc<Context>,
348 parent: Option<Arc<Mem>>,
349 mem_type: cl_mem_object_type,
351 image_format: &cl_image_format,
352 mut image_desc: cl_image_desc,
354 host_ptr: *mut c_void,
355 props: Vec<cl_mem_properties>,
356 ) -> CLResult<Arc<Mem>> {
357 // we have to sanitize the image_desc a little for internal use
358 let api_image_desc = image_desc;
359 let dims = image_desc.dims();
360 let is_array = image_desc.is_array();
362 image_desc.image_depth = 1;
365 image_desc.image_height = 1;
368 image_desc.image_array_size = 1;
371 let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
372 ResourceType::Staging
377 let texture = if parent.is_none() {
378 let mut texture = context.create_texture(
382 bit_check(flags, CL_MEM_COPY_HOST_PTR),
386 // if we error allocating a Staging resource, just try with normal as
387 // `CL_MEM_ALLOC_HOST_PTR` is just a performance hint.
388 if res_type == ResourceType::Staging && texture.is_err() {
389 texture = context.create_texture(
393 bit_check(flags, CL_MEM_COPY_HOST_PTR),
394 ResourceType::Normal,
403 let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
409 let pipe_format = image_format.to_pipe_format().unwrap();
411 base: CLObjectBase::new(),
416 size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize,
419 image_format: *image_format,
420 pipe_format: pipe_format,
421 image_desc: api_image_desc,
422 image_elem_size: image_elem_size,
424 cbs: Mutex::new(Vec::new()),
426 maps: Mappings::new(),
430 pub fn pixel_size(&self) -> Option<u8> {
431 if self.is_buffer() {
434 self.image_format.pixel_size()
438 pub fn is_buffer(&self) -> bool {
439 self.mem_type == CL_MEM_OBJECT_BUFFER
449 ) -> CLResult<PipeTransfer> {
450 let b = self.to_parent(&mut offset);
451 let r = b.get_res()?.get(&q.device).unwrap();
455 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
456 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
458 ResourceMapType::Normal,
466 ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
468 let b = self.to_parent(&mut offset);
469 let r = b.get_res()?.get(&q.device).unwrap();
470 let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
471 let ctx = q.device.helper_ctx();
473 assert!(self.is_buffer());
475 let tx = if can_map_directly(q.device, r) {
476 ctx.buffer_map_directly(
478 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
486 if let Some(tx) = tx {
492 .resource_create_buffer(size as u32, ResourceType::Staging)
493 .ok_or(CL_OUT_OF_RESOURCES)?;
494 let tx = ctx.buffer_map_coherent(&shadow, 0, size, rw);
495 Ok((tx, Some(shadow)))
502 ctx: &'a PipeContext,
506 ) -> CLResult<GuardedPipeTransfer<'a>> {
507 Ok(self.tx_raw(q, ctx, offset, size, rw)?.with_ctx(ctx))
516 ) -> CLResult<PipeTransfer> {
517 assert!(!self.is_buffer());
519 let r = self.get_res()?.get(&q.device).unwrap();
520 Ok(ctx.texture_map(r, bx, rw, ResourceMapType::Normal))
523 fn tx_image_raw_async(
528 ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
529 assert!(!self.is_buffer());
531 let r = self.get_res()?.get(q.device).unwrap();
532 let ctx = q.device.helper_ctx();
534 let tx = if can_map_directly(q.device, r) {
535 ctx.texture_map_directly(r, bx, rw)
540 if let Some(tx) = tx {
546 .resource_create_texture(
551 cl_mem_type_to_texture_target(self.image_desc.image_type),
553 ResourceType::Staging,
556 .ok_or(CL_OUT_OF_RESOURCES)?;
557 let tx = ctx.texture_map_coherent(&shadow, bx, rw);
558 Ok((tx, Some(shadow)))
565 ctx: &'a PipeContext,
568 ) -> CLResult<GuardedPipeTransfer<'a>> {
569 Ok(self.tx_image_raw(q, ctx, bx, rw)?.with_ctx(ctx))
572 pub fn has_same_parent(&self, other: &Self) -> bool {
573 ptr::eq(self.get_parent(), other.get_parent())
576 pub fn is_parent_buffer(&self) -> bool {
577 self.parent.as_ref().map_or(false, |p| p.is_buffer())
580 pub fn is_image_from_buffer(&self) -> bool {
581 self.is_parent_buffer() && self.mem_type == CL_MEM_OBJECT_IMAGE2D
584 // this is kinda bogus, because that won't work with system SVM, but the spec wants us to
586 pub fn is_svm(&self) -> bool {
587 let mem = self.get_parent();
588 self.context.find_svm_alloc(mem.host_ptr.cast()).is_some()
589 && bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
592 fn get_res(&self) -> CLResult<&HashMap<&'static Device, Arc<PipeResource>>> {
593 self.get_parent().res.as_ref().ok_or(CL_OUT_OF_HOST_MEMORY)
596 pub fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
597 Ok(self.get_res()?.get(dev).unwrap())
600 fn get_parent(&self) -> &Self {
601 if let Some(parent) = &self.parent {
608 fn to_parent<'a>(&'a self, offset: &mut usize) -> &'a Self {
609 if let Some(parent) = &self.parent {
610 offset.add_assign(self.offset);
617 fn has_user_shadow_buffer(&self, d: &Device) -> CLResult<bool> {
618 let r = self.get_res()?.get(d).unwrap();
619 Ok(!r.is_user && bit_check(self.flags, CL_MEM_USE_HOST_PTR))
630 assert!(self.is_buffer());
632 let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
635 ptr::copy_nonoverlapping(tx.ptr(), ptr, size);
641 pub fn write_from_user(
649 assert!(self.is_buffer());
651 let b = self.to_parent(&mut offset);
652 let r = b.get_res()?.get(&q.device).unwrap();
655 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
657 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
667 mut src_origin: CLVec<usize>,
668 mut dst_origin: CLVec<usize>,
669 region: &CLVec<usize>,
672 let src = self.to_parent(&mut src_origin[0]);
673 let dst = dst.to_parent(&mut dst_origin[0]);
675 let src_res = src.get_res()?.get(&q.device).unwrap();
676 let dst_res = dst.get_res()?.get(&q.device).unwrap();
678 // We just want to use sw_copy if mem objects have different types
679 // or if copy can have custom strides (image2d from buff/images)
680 if self.is_buffer() != dst_base.is_buffer()
681 || !self.is_buffer() && self.parent.is_some()
682 || !dst_base.is_buffer() && dst_base.parent.is_some()
686 let mut src_pitch = [0, 0, 0];
687 let mut dst_pitch = [0, 0, 0];
689 let bpp = if !self.is_buffer() {
690 self.pixel_size().unwrap() as usize
692 dst_base.pixel_size().unwrap() as usize
696 // If image is created from a buffer, use image's slice and row pitch instead
698 if self.is_image_from_buffer() {
699 src_pitch[1] = self.image_desc.row_pitch()? as usize;
700 src_pitch[2] = self.image_desc.slice_pitch();
702 src_pitch[1] = region[0] * bpp;
703 src_pitch[2] = region[0] * region[1] * bpp;
706 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
707 tx_src = src.tx(q, ctx, offset, size, RWFlags::RD)?;
709 tx_src = src.tx_image(
712 &create_pipe_box(src_origin, *region, src.mem_type)?,
716 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
720 // If image is created from a buffer, use image's slice and row pitch instead
722 if dst_base.is_image_from_buffer() {
723 dst_pitch[1] = dst_base.image_desc.row_pitch()? as usize;
724 dst_pitch[2] = dst_base.image_desc.slice_pitch();
726 dst_pitch[1] = region[0] * bpp;
727 dst_pitch[2] = region[0] * region[1] * bpp;
730 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
731 tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
733 tx_dst = dst.tx_image(
736 &create_pipe_box(dst_origin, *region, dst.mem_type)?,
740 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
743 // Those pitch values cannot have 0 value in its coordinates
744 assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
745 assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
760 let bx = create_pipe_box(src_origin, *region, src.mem_type)?;
761 let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
763 if src.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
764 (dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
767 ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
780 assert!(self.is_buffer());
782 let b = self.to_parent(&mut offset);
783 let res = b.get_res()?.get(&q.device).unwrap();
787 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
788 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
798 origin: &CLVec<usize>,
799 region: &CLVec<usize>,
801 assert!(!self.is_buffer());
803 let res = self.get_res()?.get(&q.device).unwrap();
804 // make sure we allocate multiples of 4 bytes so drivers don't read out of bounds or
806 // TODO: use div_ceil once it's available
807 let pixel_size = align(self.pixel_size().unwrap() as usize, size_of::<u32>());
808 let mut new_pattern: Vec<u32> = vec![0; pixel_size / size_of::<u32>()];
810 // we don't support CL_DEPTH for now
811 assert!(pattern.len() == 4);
813 // SAFETY: pointers have to be valid for read/writes of exactly one pixel of their
814 // respective format.
815 // `new_pattern` has the correct size due to the `size` above.
816 // `pattern` is validated through the CL API and allows undefined behavior if not followed
817 // by CL API rules. It's expected to be a 4 component array of 32 bit values, except for
818 // CL_DEPTH where it's just one value.
820 util_format_pack_rgba(
822 new_pattern.as_mut_ptr().cast(),
823 pattern.as_ptr().cast(),
828 // If image is created from a buffer, use clear_image_buffer instead
830 if self.is_parent_buffer() {
832 self.image_desc.row_pitch()? as usize,
833 self.image_desc.slice_pitch(),
835 ctx.clear_image_buffer(res, &new_pattern, origin, region, strides, pixel_size);
837 let bx = create_pipe_box(*origin, *region, self.mem_type)?;
838 ctx.clear_texture(res, &new_pattern, &bx);
844 pub fn write_from_user_rect(
849 region: &CLVec<usize>,
850 src_origin: &CLVec<usize>,
851 src_row_pitch: usize,
852 mut src_slice_pitch: usize,
853 dst_origin: &CLVec<usize>,
854 dst_row_pitch: usize,
855 dst_slice_pitch: usize,
857 if self.is_buffer() || self.is_image_from_buffer() {
858 let pixel_size = self.pixel_size().unwrap();
859 let (offset, size) = CLVec::calc_offset_size(
863 pixel_size.try_into().unwrap(),
868 let tx = self.tx(q, ctx, offset, size, RWFlags::WR)?;
883 assert!(dst_row_pitch == self.image_desc.image_row_pitch);
884 assert!(dst_slice_pitch == self.image_desc.image_slice_pitch);
885 assert!(src_origin == &CLVec::default());
887 let res = self.get_res()?.get(&q.device).unwrap();
888 let bx = create_pipe_box(*dst_origin, *region, self.mem_type)?;
890 if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
891 src_slice_pitch = src_row_pitch;
900 .map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
907 pub fn read_to_user_rect(
912 region: &CLVec<usize>,
913 src_origin: &CLVec<usize>,
914 mut src_row_pitch: usize,
915 mut src_slice_pitch: usize,
916 dst_origin: &CLVec<usize>,
917 dst_row_pitch: usize,
918 dst_slice_pitch: usize,
923 if self.is_buffer() || self.is_image_from_buffer() {
924 pixel_size = self.pixel_size().unwrap();
925 let (offset, size) = CLVec::calc_offset_size(
929 pixel_size.try_into().unwrap(),
934 tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
936 assert!(dst_origin == &CLVec::default());
938 let bx = create_pipe_box(*src_origin, *region, self.mem_type)?;
939 tx = self.tx_image(q, ctx, &bx, RWFlags::RD)?;
940 src_row_pitch = tx.row_pitch() as usize;
941 src_slice_pitch = tx.slice_pitch();
943 pixel_size = self.pixel_size().unwrap();
967 region: &CLVec<usize>,
968 src_origin: &CLVec<usize>,
969 src_row_pitch: usize,
970 src_slice_pitch: usize,
971 dst_origin: &CLVec<usize>,
972 dst_row_pitch: usize,
973 dst_slice_pitch: usize,
975 assert!(self.is_buffer());
976 assert!(dst.is_buffer());
979 CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
980 let tx_src = self.tx(q, ctx, offset, size, RWFlags::RD)?;
983 CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
984 let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
986 // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
1003 // TODO: only sync on map when the memory is not mapped with discard
1004 pub fn sync_shadow_buffer(
1010 let mut lock = self.maps.lock().unwrap();
1011 if !lock.increase_ref(q.device, ptr) {
1015 if self.has_user_shadow_buffer(q.device)? {
1016 self.read_to_user(q, ctx, 0, self.host_ptr, self.size)
1018 if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
1020 let b = self.to_parent(&mut offset);
1021 let res = b.get_res_of_dev(q.device)?;
1022 let bx = create_pipe_box(
1023 [offset, 0, 0].into(),
1024 [self.size, 1, 1].into(),
1025 CL_MEM_OBJECT_BUFFER,
1027 ctx.resource_copy_region(res, shadow, &[0; 3], &bx);
1033 // TODO: only sync on map when the memory is not mapped with discard
1034 pub fn sync_shadow_image(
1040 let mut lock = self.maps.lock().unwrap();
1041 if !lock.increase_ref(q.device, ptr) {
1045 if self.has_user_shadow_buffer(q.device)? {
1046 self.read_to_user_rect(
1050 &self.image_desc.size(),
1055 self.image_desc.image_row_pitch,
1056 self.image_desc.image_slice_pitch,
1059 if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) {
1060 let res = self.get_res_of_dev(q.device)?;
1061 let bx = self.image_desc.bx()?;
1062 ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
1068 /// Maps the queue associated device's resource.
1070 /// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so
1071 /// called non blocking maps. Non blocking maps shall return a valid pointer to the mapped
1072 /// region immediately, but should not synchronize data (in case of shadow buffers) until after
1073 /// the map event is reached in the queue.
1074 /// This makes it not possible to simply use pipe_transfers as those can't be explicitly synced
1075 /// by the frontend.
1077 /// In order to have a compliant implementation of the mapping API we have to consider the
1078 /// following cases:
1079 /// 1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr.
1080 /// Synchronization of shadowed host ptrs are done in `sync_shadow_buffer` and
1081 /// `sync_shadow_image` on demand.
1082 /// 2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with
1083 /// `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer.
1084 /// 3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we
1085 /// - create a shadow pipe_resource with `PIPE_USAGE_STAGING`,
1086 /// `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT`
1087 /// - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and
1088 /// `PIPE_MAP_UNSYNCHRONIZED`
1089 /// - sync the shadow buffer like a host_ptr shadow buffer in 1.
1091 /// Taking this approach we guarentee that we only copy when actually needed while making sure
1092 /// the content behind the returned pointer is valid until unmapped.
1096 lock: &'a mut MutexGuard<Mappings>,
1098 ) -> CLResult<&'a PipeTransfer> {
1099 if let Entry::Vacant(e) = lock.tx.entry(q.device) {
1100 let (tx, res) = if self.is_buffer() {
1101 self.tx_raw_async(q, rw)?
1103 let bx = self.image_desc.bx()?;
1104 self.tx_image_raw_async(q, &bx, rw)?
1107 e.insert(MappingTransfer::new(tx, res));
1109 lock.mark_pending(q.device);
1112 Ok(&lock.tx.get_mut(&q.device).unwrap().tx)
1115 pub fn map_buffer(&self, q: &Arc<Queue>, offset: usize, _size: usize) -> CLResult<*mut c_void> {
1116 assert!(self.is_buffer());
1118 let mut lock = self.maps.lock().unwrap();
1119 let ptr = if self.has_user_shadow_buffer(q.device)? {
1122 let tx = self.map(q, &mut lock, RWFlags::RW)?;
1126 let ptr = unsafe { ptr.add(offset) };
1133 origin: &CLVec<usize>,
1134 _region: &CLVec<usize>,
1135 row_pitch: &mut usize,
1136 slice_pitch: &mut usize,
1137 ) -> CLResult<*mut c_void> {
1138 assert!(!self.is_buffer());
1140 let mut lock = self.maps.lock().unwrap();
1142 // we might have a host_ptr shadow buffer or image created from buffer
1143 let ptr = if self.has_user_shadow_buffer(q.device)? || self.is_parent_buffer() {
1144 *row_pitch = self.image_desc.image_row_pitch;
1145 *slice_pitch = self.image_desc.image_slice_pitch;
1147 if let Some(src) = &self.parent {
1148 let tx = src.map(q, &mut lock, RWFlags::RW)?;
1154 let tx = self.map(q, &mut lock, RWFlags::RW)?;
1156 if self.image_desc.dims() > 1 {
1157 *row_pitch = tx.row_pitch() as usize;
1159 if self.image_desc.dims() > 2 || self.image_desc.is_array() {
1160 *slice_pitch = tx.slice_pitch();
1170 self.pixel_size().unwrap() as usize,
1180 pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
1181 self.maps.lock().unwrap().maps.contains_key(&ptr)
1184 // TODO: only sync on unmap when the memory is not mapped for writing
1185 pub fn unmap(&self, q: &Arc<Queue>, ctx: &PipeContext, ptr: *mut c_void) -> CLResult<()> {
1186 let mut lock = self.maps.lock().unwrap();
1187 if !lock.maps.contains_key(&ptr) {
1191 let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
1193 if let Some(shadow) = shadow {
1195 let b = self.to_parent(&mut offset);
1196 let res = b.get_res_of_dev(q.device)?;
1198 let bx = if b.is_buffer() {
1201 [self.size, 1, 1].into(),
1202 CL_MEM_OBJECT_BUFFER,
1205 self.image_desc.bx()?
1208 ctx.resource_copy_region(shadow, res, &[offset as u32, 0, 0], &bx);
1209 } else if self.has_user_shadow_buffer(q.device)? {
1210 if self.is_buffer() {
1211 self.write_from_user(q, ctx, 0, self.host_ptr, self.size)?;
1213 self.write_from_user_rect(
1217 &self.image_desc.size(),
1219 self.image_desc.image_row_pitch,
1220 self.image_desc.image_slice_pitch,
1222 self.image_desc.image_row_pitch,
1223 self.image_desc.image_slice_pitch,
1229 lock.clean_up_tx(q.device, ctx);
1236 fn drop(&mut self) {
1237 let cl = cl_mem::from_ptr(self);
1243 .for_each(|cb| cb(cl));
1245 for (d, tx) in self.maps.get_mut().unwrap().tx.drain() {
1246 d.helper_ctx().unmap(tx.tx);
1251 pub struct Sampler {
1252 pub base: CLObjectBase<CL_INVALID_SAMPLER>,
1253 pub context: Arc<Context>,
1254 pub normalized_coords: bool,
1255 pub addressing_mode: cl_addressing_mode,
1256 pub filter_mode: cl_filter_mode,
1257 pub props: Option<Properties<cl_sampler_properties>>,
1260 impl_cl_type_trait!(cl_sampler, Sampler, CL_INVALID_SAMPLER);
1264 context: Arc<Context>,
1265 normalized_coords: bool,
1266 addressing_mode: cl_addressing_mode,
1267 filter_mode: cl_filter_mode,
1268 props: Option<Properties<cl_sampler_properties>>,
1271 base: CLObjectBase::new(),
1273 normalized_coords: normalized_coords,
1274 addressing_mode: addressing_mode,
1275 filter_mode: filter_mode,
1281 addressing_mode: u32,
1283 normalized_coords: u32,
1284 ) -> (cl_addressing_mode, cl_filter_mode, bool) {
1285 let addr_mode = match addressing_mode {
1286 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_NONE => CL_ADDRESS_NONE,
1287 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE => {
1288 CL_ADDRESS_CLAMP_TO_EDGE
1290 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP => CL_ADDRESS_CLAMP,
1291 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT => CL_ADDRESS_REPEAT,
1292 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED => {
1293 CL_ADDRESS_MIRRORED_REPEAT
1295 _ => panic!("unknown addressing_mode"),
1298 let filter = match filter_mode {
1299 cl_sampler_filter_mode::SAMPLER_FILTER_MODE_NEAREST => CL_FILTER_NEAREST,
1300 cl_sampler_filter_mode::SAMPLER_FILTER_MODE_LINEAR => CL_FILTER_LINEAR,
1301 _ => panic!("unknown filter_mode"),
1304 (addr_mode, filter, normalized_coords != 0)
1308 (addressing_mode, filter_mode, normalized_coords): (
1313 ) -> pipe_sampler_state {
1314 let mut res = pipe_sampler_state::default();
1316 let wrap = match addressing_mode {
1317 CL_ADDRESS_CLAMP_TO_EDGE => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1318 CL_ADDRESS_CLAMP => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_BORDER,
1319 CL_ADDRESS_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_REPEAT,
1320 CL_ADDRESS_MIRRORED_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_MIRROR_REPEAT,
1321 // TODO: what's a reasonable default?
1322 _ => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1325 let img_filter = match filter_mode {
1326 CL_FILTER_NEAREST => pipe_tex_filter::PIPE_TEX_FILTER_NEAREST,
1327 CL_FILTER_LINEAR => pipe_tex_filter::PIPE_TEX_FILTER_LINEAR,
1328 _ => panic!("unknown filter_mode"),
1331 res.set_min_img_filter(img_filter);
1332 res.set_mag_img_filter(img_filter);
1333 res.set_unnormalized_coords((!normalized_coords).into());
1334 res.set_wrap_r(wrap);
1335 res.set_wrap_s(wrap);
1336 res.set_wrap_t(wrap);
1341 pub fn pipe(&self) -> pipe_sampler_state {
1343 self.addressing_mode,
1345 self.normalized_coords,