Work-group size should always be aligned to subgroup size; this is a
basic requirement, otherwise some work-items will be no-operation.
It might make sense to refine the value according to a kernel's
resource usage, but that's a possible optimization for the future.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
break;
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
- buf.as_scalar<size_t>() = 1;
+ buf.as_scalar<size_t>() = dev.subgroup_size();
break;
case CL_KERNEL_PRIVATE_MEM_SIZE:
return { v.begin(), v.end() };
}
+cl_uint
+device::subgroup_size() const {
+ return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
+}
+
std::string
device::device_name() const {
return pipe->get_name(pipe);
bool has_doubles() const;
std::vector<size_t> max_block_size() const;
+ cl_uint subgroup_size() const;
std::string device_name() const;
std::string vendor_name() const;
enum pipe_shader_ir ir_format() const;