void
v3dv_cmd_buffer_rewrite_indirect_csd_job(
+ struct v3dv_device *device,
struct v3dv_csd_indirect_cpu_job_info *info,
const uint32_t *wg_counts)
{
submit->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
- submit->cfg[4] = DIV_ROUND_UP(info->wg_size, 16) *
- (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
+ uint32_t num_batches = DIV_ROUND_UP(info->wg_size, 16) *
+ (wg_counts[0] * wg_counts[1] * wg_counts[2]);
+ /* V3D 7.1.6 and later don't subtract 1 from the number of batches */
+ if (device->devinfo.ver < 71 ||
+ (device->devinfo.ver == 71 && device->devinfo.rev < 6)) {
+ submit->cfg[4] = num_batches - 1;
+ } else {
+ submit->cfg[4] = num_batches;
+ }
assert(submit->cfg[4] != ~0);
if (info->needs_wg_uniform_rewrite) {
uint32_t **wg_uniform_offsets_out,
uint32_t *wg_size_out)
{
+ struct v3dv_device *device = cmd_buffer->device;
struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
struct v3dv_shader_variant *cs_variant =
if (wg_size_out)
*wg_size_out = wg_size;
- submit->cfg[4] = num_batches - 1;
+ /* V3D 7.1.6 and later don't subtract 1 from the number of batches */
+ if (device->devinfo.ver < 71 ||
+ (device->devinfo.ver == 71 && device->devinfo.rev < 6)) {
+ submit->cfg[4] = num_batches - 1;
+ } else {
+ submit->cfg[4] = num_batches;
+ }
assert(submit->cfg[4] != ~0);
assert(pipeline->shared_data->assembly_bo);
struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo;
submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset;
- submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
if (cs_variant->prog_data.base->single_seg)
submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG;
if (cs_variant->prog_data.base->threads == 4)
submit->cfg[5] |= V3D_CSD_CFG5_THREADING;
+ /* V3D 7.x has made the PROPAGATE_NANS bit in CFG5 reserved */
+ if (device->devinfo.ver < 71)
+ submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
if (cs_variant->prog_data.cs->shared_size > 0) {
job->csd.shared_memory =
void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
struct drm_v3d_submit_tfu *tfu);
-void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
+void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_device *device,
+ struct v3dv_csd_indirect_cpu_job_info *info,
const uint32_t *wg_counts);
void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
submit.cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT;
- /* Number of batches the dispatch will invoke (minus 1). */
- submit.cfg[4] = num_batches - 1;
+ /* Number of batches the dispatch will invoke.
+ * V3D 7.1.6 and later don't subtract 1 from the number of batches
+ */
+ if (v3d->screen->devinfo.ver < 71 ||
+ (v3d->screen->devinfo.ver == 71 && v3d->screen->devinfo.rev < 6)) {
+ submit.cfg[4] = num_batches - 1;
+ } else {
+ submit.cfg[4] = num_batches;
+ }
/* Make sure we didn't accidentally underflow. */
assert(submit.cfg[4] != ~0);
v3d_job_add_bo(job, v3d_resource(v3d->prog.compute->resource)->bo);
submit.cfg[5] = (v3d_resource(v3d->prog.compute->resource)->bo->offset +
v3d->prog.compute->offset);
- submit.cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
+ if (v3d->screen->devinfo.ver < 71)
+ submit.cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS;
if (v3d->prog.compute->prog_data.base->single_seg)
submit.cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG;
if (v3d->prog.compute->prog_data.base->threads == 4)