2 * Copyright 2010 Red Hat Inc.
3 * Copyright 2014-2017 Broadcom
4 * Copyright 2019-2020 Collabora, Ltd.
5 * Copyright 2006 VMware, Inc.
6 * SPDX-License-Identifier: MIT
11 #include "asahi/compiler/agx_compile.h"
12 #include "asahi/layout/layout.h"
13 #include "asahi/lib/agx_formats.h"
14 #include "asahi/lib/decode.h"
15 #include "drm-uapi/drm_fourcc.h"
16 #include "frontend/winsys_handle.h"
17 #include "gallium/auxiliary/renderonly/renderonly.h"
18 #include "gallium/auxiliary/util/u_debug_cb.h"
19 #include "gallium/auxiliary/util/u_framebuffer.h"
20 #include "gallium/auxiliary/util/u_sample_positions.h"
21 #include "gallium/auxiliary/util/u_surface.h"
22 #include "gallium/auxiliary/util/u_transfer.h"
23 #include "gallium/auxiliary/util/u_transfer_helper.h"
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_screen.h"
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/half_float.h"
30 #include "util/u_drm.h"
31 #include "util/u_gen_mipmap.h"
32 #include "util/u_inlines.h"
33 #include "util/u_memory.h"
34 #include "util/u_screen.h"
35 #include "util/u_upload_mgr.h"
36 #include "agx_device.h"
37 #include "agx_disk_cache.h"
38 #include "agx_fence.h"
39 #include "agx_public.h"
40 #include "agx_state.h"
41 #include "agx_tilebuffer.h"
43 /* Fake values, pending UAPI upstreaming */
44 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
45 #define DRM_FORMAT_MOD_APPLE_TWIDDLED (2)
47 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED
48 #define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3)
51 /* clang-format off */
52 static const struct debug_named_value agx_debug_options[] = {
53 {"trace", AGX_DBG_TRACE, "Trace the command stream"},
54 {"deqp", AGX_DBG_DEQP, "Hacks for dEQP"},
55 {"no16", AGX_DBG_NO16, "Disable 16-bit support"},
56 {"perf", AGX_DBG_PERF, "Print performance warnings"},
58 {"dirty", AGX_DBG_DIRTY, "Disable dirty tracking"},
60 {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"},
61 {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"},
62 {"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"},
63 {"sync", AGX_DBG_SYNC, "Synchronously wait for all submissions"},
64 {"stats", AGX_DBG_STATS, "Show command execution statistics"},
65 {"resource", AGX_DBG_RESOURCE, "Log resource operations"},
66 {"batch", AGX_DBG_BATCH, "Log batches"},
67 {"nowc", AGX_DBG_NOWC, "Disable write-combining"},
68 {"synctvb", AGX_DBG_SYNCTVB, "Synchronous TVB growth"},
69 {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"},
70 {"nomsaa", AGX_DBG_NOMSAA, "Force disable MSAA"},
71 {"noshadow", AGX_DBG_NOSHADOW, "Force disable resource shadowing"},
76 uint64_t agx_best_modifiers[] = {
77 DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED,
78 DRM_FORMAT_MOD_APPLE_TWIDDLED,
79 DRM_FORMAT_MOD_LINEAR,
82 /* These limits are arbitrarily chosen and subject to change as
83 * we discover more workloads with heavy shadowing.
85 * Maximum size of a shadowed object in bytes.
86 * Hint: 1024x1024xRGBA8 = 4 MiB. Go higher for compression.
88 #define MAX_SHADOW_BYTES (6 * 1024 * 1024)
90 /* Maximum cumulative size to shadow an object before we flush.
91 * Allows shadowing a 4MiB + meta object 8 times with the logic
92 * below (+1 shadow offset implied).
94 #define MAX_TOTAL_SHADOW_BYTES (32 * 1024 * 1024)
96 void agx_init_state_functions(struct pipe_context *ctx);
102 static enum ail_tiling
103 ail_modifier_to_tiling(uint64_t modifier)
106 case DRM_FORMAT_MOD_LINEAR:
107 return AIL_TILING_LINEAR;
108 case DRM_FORMAT_MOD_APPLE_TWIDDLED:
109 return AIL_TILING_TWIDDLED;
110 case DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED:
111 return AIL_TILING_TWIDDLED_COMPRESSED;
113 unreachable("Unsupported modifier");
117 const static char *s_tiling[] = {
118 [AIL_TILING_LINEAR] = "LINR",
119 [AIL_TILING_TWIDDLED] = "TWID",
120 [AIL_TILING_TWIDDLED_COMPRESSED] = "COMP",
123 #define rsrc_debug(res, ...) \
125 if (agx_device((res)->base.screen)->debug & AGX_DBG_RESOURCE) \
126 agx_msg(__VA_ARGS__); \
130 agx_resource_debug(struct agx_resource *res, const char *msg)
132 if (!(agx_device(res->base.screen)->debug & AGX_DBG_RESOURCE))
136 if (res->bo->prime_fd >= 0) {
138 if (!fstat(res->bo->prime_fd, &sb))
143 "%s%s %dx%dx%d %dL %d/%dM %dS M:%llx %s %s%s S:0x%llx LS:0x%llx CS:0x%llx "
144 "Base=0x%llx Size=0x%llx Meta=0x%llx/0x%llx (%s) %s%s%s%s%s%sfd:%d(%d) @ %p\n",
145 msg ?: "", util_format_short_name(res->base.format), res->base.width0,
146 res->base.height0, res->base.depth0, res->base.array_size,
147 res->base.last_level, res->layout.levels, res->layout.sample_count_sa,
148 (long long)res->modifier, s_tiling[res->layout.tiling],
149 res->layout.mipmapped_z ? "MZ " : "",
150 res->layout.page_aligned_layers ? "PL " : "",
151 (long long)res->layout.linear_stride_B,
152 (long long)res->layout.layer_stride_B,
153 (long long)res->layout.compression_layer_stride_B,
154 (long long)res->bo->ptr.gpu, (long long)res->layout.size_B,
155 res->layout.metadata_offset_B
156 ? ((long long)res->bo->ptr.gpu + res->layout.metadata_offset_B)
158 (long long)res->layout.metadata_offset_B, res->bo->label,
159 res->bo->flags & AGX_BO_SHARED ? "SH " : "",
160 res->bo->flags & AGX_BO_LOW_VA ? "LO " : "",
161 res->bo->flags & AGX_BO_EXEC ? "EX " : "",
162 res->bo->flags & AGX_BO_WRITEBACK ? "WB " : "",
163 res->bo->flags & AGX_BO_SHAREABLE ? "SA " : "",
164 res->bo->flags & AGX_BO_READONLY ? "RO " : "", res->bo->prime_fd, ino,
169 agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource)
171 struct pipe_resource *templ = &nresource->base;
173 nresource->layout = (struct ail_layout){
174 .tiling = ail_modifier_to_tiling(nresource->modifier),
175 .mipmapped_z = templ->target == PIPE_TEXTURE_3D,
176 .format = templ->format,
177 .width_px = templ->width0,
178 .height_px = templ->height0,
179 .depth_px = templ->depth0 * templ->array_size,
180 .sample_count_sa = MAX2(templ->nr_samples, 1),
181 .levels = templ->last_level + 1,
182 .writeable_image = templ->bind & PIPE_BIND_SHADER_IMAGE,
186 static struct pipe_resource *
187 agx_resource_from_handle(struct pipe_screen *pscreen,
188 const struct pipe_resource *templat,
189 struct winsys_handle *whandle, unsigned usage)
191 struct agx_device *dev = agx_device(pscreen);
192 struct agx_resource *rsc;
193 struct pipe_resource *prsc;
195 assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
197 rsc = CALLOC_STRUCT(agx_resource);
201 rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID
202 ? DRM_FORMAT_MOD_LINEAR
205 /* We need strides to be aligned. ail asserts this, but we want to fail
206 * gracefully so the app can handle the error.
208 if (rsc->modifier == DRM_FORMAT_MOD_LINEAR && (whandle->stride % 16) != 0) {
217 pipe_reference_init(&prsc->reference, 1);
218 prsc->screen = pscreen;
220 rsc->bo = agx_bo_import(dev, whandle->handle);
221 /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
222 * memory space to mmap it etc.
229 agx_resource_setup(dev, rsc);
231 if (rsc->layout.tiling == AIL_TILING_LINEAR) {
232 rsc->layout.linear_stride_B = whandle->stride;
233 } else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
238 assert(whandle->offset == 0);
240 ail_make_miptree(&rsc->layout);
242 if (prsc->target == PIPE_BUFFER) {
243 assert(rsc->layout.tiling == AIL_TILING_LINEAR);
244 util_range_init(&rsc->valid_buffer_range);
247 agx_resource_debug(rsc, "Import: ");
253 agx_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *ctx,
254 struct pipe_resource *pt, struct winsys_handle *handle,
257 struct agx_device *dev = agx_device(pscreen);
258 struct pipe_resource *cur = pt;
260 /* Even though asahi doesn't support multi-planar formats, we
261 * can get here through GBM, which does. Walk the list of planes
262 * to find the right one.
264 for (int i = 0; i < handle->plane; i++) {
270 struct agx_resource *rsrc = agx_resource(cur);
272 if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
273 rsrc_debug(rsrc, "Get handle: %p (KMS RO)\n", rsrc);
275 if (!rsrc->scanout && dev->ro && (rsrc->base.bind & PIPE_BIND_SCANOUT)) {
277 renderonly_scanout_for_resource(&rsrc->base, dev->ro, NULL);
283 return renderonly_get_handle(rsrc->scanout, handle);
284 } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
285 rsrc_debug(rsrc, "Get handle: %p (KMS)\n", rsrc);
287 handle->handle = rsrc->bo->handle;
288 } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
289 int fd = agx_bo_export(rsrc->bo);
295 if (dev->debug & AGX_DBG_RESOURCE) {
297 fstat(rsrc->bo->prime_fd, &sb);
298 agx_msg("Get handle: %p (FD %d/%ld)\n", rsrc, fd, (long)sb.st_ino);
301 /* Other handle types not supported */
305 handle->stride = ail_get_wsi_stride_B(&rsrc->layout, 0);
306 handle->size = rsrc->layout.size_B;
307 handle->offset = rsrc->layout.level_offsets_B[0];
308 handle->format = rsrc->layout.format;
309 handle->modifier = rsrc->modifier;
315 agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
316 struct pipe_resource *prsc, unsigned plane,
317 unsigned layer, unsigned level,
318 enum pipe_resource_param param, unsigned usage,
321 struct agx_resource *rsrc = (struct agx_resource *)prsc;
322 struct pipe_resource *cur;
326 case PIPE_RESOURCE_PARAM_STRIDE:
327 *value = ail_get_wsi_stride_B(&rsrc->layout, level);
329 case PIPE_RESOURCE_PARAM_OFFSET:
330 *value = rsrc->layout.level_offsets_B[level];
332 case PIPE_RESOURCE_PARAM_MODIFIER:
333 *value = rsrc->modifier;
335 case PIPE_RESOURCE_PARAM_NPLANES:
336 /* We don't support multi-planar formats, but we should still handle
337 * this case for GBM shared resources.
339 for (count = 0, cur = prsc; cur; cur = cur->next)
349 agx_is_2d(enum pipe_texture_target target)
351 return (target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_RECT);
355 agx_linear_allowed(const struct agx_resource *pres)
357 /* Mipmapping not allowed with linear */
358 if (pres->base.last_level != 0)
361 /* Depth/stencil buffers must not be linear */
362 if (pres->base.bind & PIPE_BIND_DEPTH_STENCIL)
365 /* Multisampling not allowed with linear */
366 if (pres->base.nr_samples > 1)
369 /* Block compression not allowed with linear */
370 if (util_format_is_compressed(pres->base.format))
373 switch (pres->base.target) {
374 /* 1D is always linear, even with image atomics */
376 case PIPE_TEXTURE_1D:
377 case PIPE_TEXTURE_1D_ARRAY:
379 /* Linear textures require specifying their strides explicitly, which only
380 * works for 2D textures. Rectangle textures are a special case of 2D.
382 * However, we don't want to support this case in the image atomic
383 * implementation, so linear shader images are specially forbidden.
385 case PIPE_TEXTURE_2D:
386 case PIPE_TEXTURE_2D_ARRAY:
387 case PIPE_TEXTURE_RECT:
388 if (pres->base.bind & PIPE_BIND_SHADER_IMAGE)
393 /* No other texture type can specify a stride */
402 agx_twiddled_allowed(const struct agx_resource *pres)
404 /* Certain binds force linear */
405 if (pres->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_LINEAR))
408 /* Buffers must be linear */
409 if (pres->base.target == PIPE_BUFFER)
412 /* Anything else may be twiddled */
417 agx_compression_allowed(const struct agx_resource *pres)
419 /* Allow disabling compression for debugging */
420 if (agx_device(pres->base.screen)->debug & AGX_DBG_NOCOMPRESS) {
421 rsrc_debug(pres, "No compression: disabled\n");
425 /* Limited to renderable */
426 if (pres->base.bind &
427 ~(PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
428 PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) {
429 rsrc_debug(pres, "No compression: not renderable\n");
433 /* We use the PBE for compression via staging blits, so we can only compress
434 * renderable formats. As framebuffer compression, other formats don't make a
435 * ton of sense to compress anyway.
437 if (!agx_pixel_format[pres->base.format].renderable &&
438 !util_format_is_depth_or_stencil(pres->base.format)) {
439 rsrc_debug(pres, "No compression: format not renderable\n");
443 /* Lossy-compressed texture formats cannot be compressed */
444 assert(!util_format_is_compressed(pres->base.format) &&
445 "block-compressed formats are not renderable");
447 if (!ail_can_compress(pres->base.width0, pres->base.height0,
448 MAX2(pres->base.nr_samples, 1))) {
449 rsrc_debug(pres, "No compression: too small\n");
457 agx_select_modifier_from_list(const struct agx_resource *pres,
458 const uint64_t *modifiers, int count)
460 if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) &&
461 drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers,
463 return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
465 if (agx_twiddled_allowed(pres) &&
466 drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count))
467 return DRM_FORMAT_MOD_APPLE_TWIDDLED;
469 if (agx_linear_allowed(pres) &&
470 drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
471 return DRM_FORMAT_MOD_LINEAR;
473 /* We didn't find anything */
474 return DRM_FORMAT_MOD_INVALID;
478 agx_select_best_modifier(const struct agx_resource *pres)
480 /* Prefer linear for staging resources, which should be as fast as possible
481 * to write from the CPU.
483 if (agx_linear_allowed(pres) && pres->base.usage == PIPE_USAGE_STAGING)
484 return DRM_FORMAT_MOD_LINEAR;
486 /* For SCANOUT or SHARED resources with no explicit modifier selection, force
487 * linear since we cannot expect consumers to correctly pass through the
488 * modifier (unless linear is not allowed at all).
490 if (agx_linear_allowed(pres) &&
491 pres->base.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
492 return DRM_FORMAT_MOD_LINEAR;
495 if (agx_twiddled_allowed(pres)) {
496 if (agx_compression_allowed(pres))
497 return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
499 return DRM_FORMAT_MOD_APPLE_TWIDDLED;
502 assert(agx_linear_allowed(pres));
503 return DRM_FORMAT_MOD_LINEAR;
506 static struct pipe_resource *
507 agx_resource_create_with_modifiers(struct pipe_screen *screen,
508 const struct pipe_resource *templ,
509 const uint64_t *modifiers, int count)
511 struct agx_device *dev = agx_device(screen);
512 struct agx_resource *nresource;
514 nresource = CALLOC_STRUCT(agx_resource);
518 nresource->base = *templ;
519 nresource->base.screen = screen;
522 nresource->modifier =
523 agx_select_modifier_from_list(nresource, modifiers, count);
525 /* There may not be a matching modifier, bail if so */
526 if (nresource->modifier == DRM_FORMAT_MOD_INVALID) {
531 nresource->modifier = agx_select_best_modifier(nresource);
533 assert(nresource->modifier != DRM_FORMAT_MOD_INVALID);
536 /* If there's only 1 layer and there's no compression, there's no harm in
537 * inferring the shader image flag. Do so to avoid reallocation in case the
538 * resource is later used as an image.
540 if (nresource->modifier != DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED &&
541 templ->depth0 == 1) {
543 nresource->base.bind |= PIPE_BIND_SHADER_IMAGE;
546 nresource->mipmapped = (templ->last_level > 0);
548 assert(templ->format != PIPE_FORMAT_Z24X8_UNORM &&
549 templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT &&
550 "u_transfer_helper should have lowered");
552 agx_resource_setup(dev, nresource);
554 pipe_reference_init(&nresource->base.reference, 1);
556 ail_make_miptree(&nresource->layout);
558 if (templ->target == PIPE_BUFFER) {
559 assert(nresource->layout.tiling == AIL_TILING_LINEAR);
560 util_range_init(&nresource->valid_buffer_range);
563 /* Guess a label based on the bind */
564 unsigned bind = templ->bind;
566 const char *label = (bind & PIPE_BIND_INDEX_BUFFER) ? "Index buffer"
567 : (bind & PIPE_BIND_SCANOUT) ? "Scanout"
568 : (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
569 : (bind & PIPE_BIND_SHARED) ? "Shared resource"
570 : (bind & PIPE_BIND_RENDER_TARGET) ? "Render target"
571 : (bind & PIPE_BIND_DEPTH_STENCIL)
572 ? "Depth/stencil buffer"
573 : (bind & PIPE_BIND_SAMPLER_VIEW) ? "Texture"
574 : (bind & PIPE_BIND_VERTEX_BUFFER) ? "Vertex buffer"
575 : (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
576 : (bind & PIPE_BIND_GLOBAL) ? "Global memory"
577 : (bind & PIPE_BIND_SHADER_BUFFER) ? "Shader buffer"
578 : (bind & PIPE_BIND_SHADER_IMAGE) ? "Shader image"
581 uint32_t create_flags = 0;
583 /* Default to write-combine resources, but use writeback if that is expected
586 if (nresource->base.usage == PIPE_USAGE_STAGING ||
587 (nresource->base.flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
589 create_flags |= AGX_BO_WRITEBACK;
592 /* Allow disabling write-combine to debug performance issues */
593 if (dev->debug & AGX_DBG_NOWC) {
594 create_flags |= AGX_BO_WRITEBACK;
597 /* Create buffers that might be shared with the SHAREABLE flag */
598 if (bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SHARED))
599 create_flags |= AGX_BO_SHAREABLE;
602 agx_bo_create(dev, nresource->layout.size_B, create_flags, label);
604 if (!nresource->bo) {
609 agx_resource_debug(nresource, "New: ");
610 return &nresource->base;
613 static struct pipe_resource *
614 agx_resource_create(struct pipe_screen *screen,
615 const struct pipe_resource *templ)
617 return agx_resource_create_with_modifiers(screen, templ, NULL, 0);
621 agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc)
623 struct agx_resource *rsrc = (struct agx_resource *)prsrc;
624 struct agx_screen *agx_screen = (struct agx_screen *)screen;
626 agx_resource_debug(rsrc, "Destroy: ");
628 if (prsrc->target == PIPE_BUFFER)
629 util_range_destroy(&rsrc->valid_buffer_range);
632 renderonly_scanout_destroy(rsrc->scanout, agx_screen->dev.ro);
634 agx_bo_unreference(rsrc->bo);
639 agx_batch_track_image(struct agx_batch *batch, struct pipe_image_view *image)
641 struct agx_resource *rsrc = agx_resource(image->resource);
643 if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
644 agx_batch_writes(batch, rsrc);
646 bool is_buffer = rsrc->base.target == PIPE_BUFFER;
647 unsigned level = is_buffer ? 0 : image->u.tex.level;
648 BITSET_SET(rsrc->data_valid, level);
651 util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
655 agx_batch_reads(batch, rsrc);
664 agx_transfer_flush_region(struct pipe_context *pipe,
665 struct pipe_transfer *transfer,
666 const struct pipe_box *box)
670 /* Reallocate the backing buffer of a resource, returns true if successful */
672 agx_shadow(struct agx_context *ctx, struct agx_resource *rsrc, bool needs_copy)
674 struct agx_device *dev = agx_device(ctx->base.screen);
675 struct agx_bo *old = rsrc->bo;
676 size_t size = rsrc->layout.size_B;
677 unsigned flags = old->flags;
679 if (dev->debug & AGX_DBG_NOSHADOW)
682 /* If a resource is (or could be) shared, shadowing would desync across
683 * processes. (It's also not what this path is for.)
685 if (flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))
688 /* Do not shadow resources that are too large */
689 if (size > MAX_SHADOW_BYTES)
692 /* Do not shadow resources too much */
693 if (rsrc->shadowed_bytes >= MAX_TOTAL_SHADOW_BYTES)
696 rsrc->shadowed_bytes += size;
698 /* If we need to copy, we reallocate the resource with cached-coherent
699 * memory. This is a heuristic: it assumes that if the app needs a shadows
700 * (with a copy) now, it will again need to shadow-and-copy the same resource
701 * in the future. This accelerates the later copies, since otherwise the copy
702 * involves reading uncached memory.
705 flags |= AGX_BO_WRITEBACK;
707 struct agx_bo *new_ = agx_bo_create(dev, size, flags, old->label);
709 /* If allocation failed, we can fallback on a flush gracefully*/
714 perf_debug_ctx(ctx, "Shadowing %zu bytes on the CPU (%s)", size,
715 (old->flags & AGX_BO_WRITEBACK) ? "cached" : "uncached");
716 agx_resource_debug(rsrc, "Shadowed: ");
718 memcpy(new_->ptr.cpu, old->ptr.cpu, size);
721 /* Swap the pointers, dropping a reference */
722 agx_bo_unreference(rsrc->bo);
725 /* Reemit descriptors using this resource */
731 * Perform the required synchronization before a transfer_map operation can
732 * complete. This may require syncing batches.
735 agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc,
737 unsigned usage, /* a combination of PIPE_MAP_x */
738 const struct pipe_box *box)
740 /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
743 if ((usage & PIPE_MAP_DISCARD_RANGE) &&
744 !(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
745 rsrc->base.last_level == 0 &&
746 util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y, box->z,
747 box->width, box->height, box->depth)) {
749 usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
752 /* Shadowing doesn't work separate stencil or shared resources */
753 if (rsrc->separate_stencil || (rsrc->bo->flags & AGX_BO_SHARED))
754 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
756 /* If the access is unsynchronized, there's nothing to do */
757 if (usage & PIPE_MAP_UNSYNCHRONIZED)
760 /* Everything after this needs the context, which is not safe for
761 * unsynchronized transfers when we claim
762 * PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE.
764 assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
766 /* Both writing and reading need writers synced */
767 agx_sync_writer(ctx, rsrc, "Unsynchronized transfer");
769 /* Additionally, writing needs readers synced */
770 if (!(usage & PIPE_MAP_WRITE))
773 /* If the range being written is uninitialized, we do not need to sync. */
774 if (rsrc->base.target == PIPE_BUFFER && !(rsrc->bo->flags & AGX_BO_SHARED) &&
775 !util_ranges_intersect(&rsrc->valid_buffer_range, box->x,
776 box->x + box->width))
779 /* If there are no readers, we're done. We check at the start to
780 * avoid expensive shadowing paths or duplicated checks in this hapyp path.
782 if (!agx_any_batch_uses_resource(ctx, rsrc)) {
783 rsrc->shadowed_bytes = 0;
787 /* There are readers. Try to shadow the resource to avoid a sync */
788 if (!(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
789 agx_shadow(ctx, rsrc, !(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)))
792 /* Otherwise, we need to sync */
793 agx_sync_readers(ctx, rsrc, "Unsynchronized write");
795 rsrc->shadowed_bytes = 0;
799 * Return a colour-renderable format compatible with a depth/stencil format, to
800 * be used as an interchange format for depth/stencil blits. For
801 * non-depth/stencil formats, returns the format itself.
803 static enum pipe_format
804 agx_staging_color_format_for_zs(enum pipe_format format)
807 case PIPE_FORMAT_Z16_UNORM:
808 return PIPE_FORMAT_R16_UNORM;
809 case PIPE_FORMAT_Z32_FLOAT:
810 return PIPE_FORMAT_R32_FLOAT;
811 case PIPE_FORMAT_S8_UINT:
812 return PIPE_FORMAT_R8_UINT;
814 /* Z24 and combined Z/S are lowered to one of the above formats by
815 * u_transfer_helper. The caller needs to pass in the rsrc->layout.format
816 * and not the rsrc->base.format to get the lowered physical format
817 * (rather than the API logical format).
819 assert(!util_format_is_depth_or_stencil(format) &&
820 "no other depth/stencil formats allowed for staging");
826 /* Most of the time we can do CPU-side transfers, but sometimes we need to use
827 * the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures.
828 * Code adapted from panfrost */
830 static struct agx_resource *
831 agx_alloc_staging(struct pipe_screen *screen, struct agx_resource *rsc,
832 unsigned level, const struct pipe_box *box)
834 struct pipe_resource tmpl = rsc->base;
836 tmpl.width0 = box->width;
837 tmpl.height0 = box->height;
840 /* We need a linear staging resource. We have linear 2D arrays, but not
841 * linear 3D or cube textures. So switch to 2D arrays if needed.
843 switch (tmpl.target) {
844 case PIPE_TEXTURE_2D_ARRAY:
845 case PIPE_TEXTURE_CUBE:
846 case PIPE_TEXTURE_CUBE_ARRAY:
847 case PIPE_TEXTURE_3D:
848 tmpl.target = PIPE_TEXTURE_2D_ARRAY;
849 tmpl.array_size = box->depth;
852 assert(tmpl.array_size == 1);
853 assert(box->depth == 1);
859 /* Linear is incompatible with depth/stencil, so we convert */
860 tmpl.format = agx_staging_color_format_for_zs(rsc->layout.format);
861 tmpl.bind &= ~PIPE_BIND_DEPTH_STENCIL;
862 tmpl.bind |= PIPE_BIND_LINEAR | PIPE_BIND_RENDER_TARGET;
864 struct pipe_resource *pstaging = screen->resource_create(screen, &tmpl);
868 return agx_resource(pstaging);
872 agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans)
874 struct pipe_resource *dst = trans->base.resource;
875 struct pipe_blit_info blit = {0};
877 blit.dst.resource = dst;
879 agx_staging_color_format_for_zs(agx_resource(dst)->layout.format);
880 blit.dst.level = trans->base.level;
881 blit.dst.box = trans->base.box;
882 blit.src.resource = trans->staging.rsrc;
883 blit.src.format = trans->staging.rsrc->format;
885 blit.src.box = trans->staging.box;
886 blit.mask = util_format_get_mask(blit.src.format);
887 blit.filter = PIPE_TEX_FILTER_NEAREST;
889 agx_blit(pctx, &blit);
893 agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
895 struct pipe_resource *src = trans->base.resource;
896 struct pipe_blit_info blit = {0};
898 blit.src.resource = src;
900 agx_staging_color_format_for_zs(agx_resource(src)->layout.format);
901 blit.src.level = trans->base.level;
902 blit.src.box = trans->base.box;
903 blit.dst.resource = trans->staging.rsrc;
904 blit.dst.format = trans->staging.rsrc->format;
906 blit.dst.box = trans->staging.box;
907 blit.mask = util_format_get_mask(blit.dst.format);
908 blit.filter = PIPE_TEX_FILTER_NEAREST;
910 agx_blit(pctx, &blit);
914 agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource,
916 unsigned usage, /* a combination of PIPE_MAP_x */
917 const struct pipe_box *box,
918 struct pipe_transfer **out_transfer)
920 struct agx_context *ctx = agx_context(pctx);
921 struct agx_resource *rsrc = agx_resource(resource);
923 /* Can't map tiled/compressed directly */
924 if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
927 /* Can't transfer out of bounds mip levels */
928 if (level >= rsrc->layout.levels)
931 agx_prepare_for_map(ctx, rsrc, level, usage, box);
933 /* Track the written buffer range */
934 if (resource->target == PIPE_BUFFER) {
935 /* Note the ordering: DISCARD|WRITE is valid, so clear before adding. */
936 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
937 util_range_set_empty(&rsrc->valid_buffer_range);
938 if (usage & PIPE_MAP_WRITE) {
939 util_range_add(resource, &rsrc->valid_buffer_range, box->x,
940 box->x + box->width);
944 struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
945 transfer->base.level = level;
946 transfer->base.usage = usage;
947 transfer->base.box = *box;
949 pipe_resource_reference(&transfer->base.resource, resource);
950 *out_transfer = &transfer->base;
952 /* For compression, we use a staging blit as we do not implement AGX
953 * compression in software. In some cases, we could use this path for
954 * twiddled too, but we don't have a use case for that yet.
956 if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED) {
957 /* Should never happen for buffers, and it's not safe */
958 assert(resource->target != PIPE_BUFFER);
960 struct agx_resource *staging =
961 agx_alloc_staging(pctx->screen, rsrc, level, box);
964 /* Staging resources have one LOD: level 0. Query the strides
967 transfer->base.stride = ail_get_linear_stride_B(&staging->layout, 0);
968 transfer->base.layer_stride = staging->layout.layer_stride_B;
969 transfer->staging.rsrc = &staging->base;
971 transfer->staging.box = *box;
972 transfer->staging.box.x = 0;
973 transfer->staging.box.y = 0;
974 transfer->staging.box.z = 0;
976 assert(transfer->staging.rsrc != NULL);
978 if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
979 agx_blit_to_staging(pctx, transfer);
980 agx_sync_writer(ctx, staging, "GPU read staging blit");
983 agx_bo_mmap(staging->bo);
984 return staging->bo->ptr.cpu;
987 agx_bo_mmap(rsrc->bo);
989 if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) {
990 /* Should never happen for buffers, and it's not safe */
991 assert(resource->target != PIPE_BUFFER);
993 transfer->base.stride =
994 util_format_get_stride(rsrc->layout.format, box->width);
996 transfer->base.layer_stride = util_format_get_2d_size(
997 rsrc->layout.format, transfer->base.stride, box->height);
999 transfer->map = calloc(transfer->base.layer_stride, box->depth);
1001 if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
1002 for (unsigned z = 0; z < box->depth; ++z) {
1003 uint8_t *map = agx_map_texture_cpu(rsrc, level, box->z + z);
1005 (uint8_t *)transfer->map + transfer->base.layer_stride * z;
1007 ail_detile(map, dst, &rsrc->layout, level, transfer->base.stride,
1008 box->x, box->y, box->width, box->height);
1012 return transfer->map;
1014 assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
1016 transfer->base.stride = ail_get_linear_stride_B(&rsrc->layout, level);
1017 transfer->base.layer_stride = rsrc->layout.layer_stride_B;
1019 /* Be conservative for direct writes */
1020 if ((usage & PIPE_MAP_WRITE) &&
1022 (PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))) {
1023 BITSET_SET(rsrc->data_valid, level);
1027 ail_get_linear_pixel_B(&rsrc->layout, level, box->x, box->y, box->z);
1029 return ((uint8_t *)rsrc->bo->ptr.cpu) + offset;
1034 agx_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
1036 /* Gallium expects writeback here, so we tile */
1038 struct agx_transfer *trans = agx_transfer(transfer);
1039 struct pipe_resource *prsrc = transfer->resource;
1040 struct agx_resource *rsrc = (struct agx_resource *)prsrc;
1042 if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) {
1043 assert(prsrc->target != PIPE_BUFFER);
1044 agx_blit_from_staging(pctx, trans);
1045 agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
1046 "GPU write staging blit");
1047 } else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) {
1048 assert(rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED);
1050 for (unsigned z = 0; z < transfer->box.depth; ++z) {
1052 agx_map_texture_cpu(rsrc, transfer->level, transfer->box.z + z);
1053 uint8_t *src = (uint8_t *)trans->map + transfer->layer_stride * z;
1055 ail_tile(map, src, &rsrc->layout, transfer->level, transfer->stride,
1056 transfer->box.x, transfer->box.y, transfer->box.width,
1057 transfer->box.height);
1061 /* The level we wrote is now initialized. We do this at the end so
1062 * blit_from_staging can avoid reloading existing contents.
1064 if (transfer->usage & PIPE_MAP_WRITE)
1065 BITSET_SET(rsrc->data_valid, transfer->level);
1067 /* Free the transfer */
1069 pipe_resource_reference(&trans->staging.rsrc, NULL);
1070 pipe_resource_reference(&transfer->resource, NULL);
1075 agx_generate_mipmap(struct pipe_context *pctx, struct pipe_resource *prsrc,
1076 enum pipe_format format, unsigned base_level,
1077 unsigned last_level, unsigned first_layer,
1078 unsigned last_layer)
1080 struct agx_resource *rsrc = agx_resource(prsrc);
1082 /* Generating a mipmap invalidates the written levels. Make that
1083 * explicit so we don't reload the previous contents.
1085 for (unsigned l = base_level + 1; l <= last_level; ++l)
1086 BITSET_CLEAR(rsrc->data_valid, l);
1088 /* For now we use util_gen_mipmap, but this has way too much overhead */
1089 perf_debug_ctx(agx_context(pctx), "Unoptimized mipmap generation");
1091 return util_gen_mipmap(pctx, prsrc, format, base_level, last_level,
1092 first_layer, last_layer, PIPE_TEX_FILTER_LINEAR);
1099 agx_clear(struct pipe_context *pctx, unsigned buffers,
1100 const struct pipe_scissor_state *scissor_state,
1101 const union pipe_color_union *color, double depth, unsigned stencil)
1103 struct agx_context *ctx = agx_context(pctx);
1104 struct agx_batch *batch = agx_get_batch(ctx);
1106 if (unlikely(!agx_render_condition_check(ctx)))
1109 unsigned fastclear = buffers & ~(batch->draw | batch->load);
1110 unsigned slowclear = buffers & ~fastclear;
1112 assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED");
1114 /* Fast clears configure the batch */
1115 for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
1116 if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
1119 static_assert(sizeof(color->f) == 16, "mismatched structure");
1121 batch->uploaded_clear_color[rt] =
1122 agx_pool_upload_aligned(&batch->pool, color->f, sizeof(color->f), 16);
1125 if (fastclear & PIPE_CLEAR_DEPTH)
1126 batch->clear_depth = depth;
1128 if (fastclear & PIPE_CLEAR_STENCIL)
1129 batch->clear_stencil = stencil;
1131 /* Slow clears draw a fullscreen rectangle */
1133 agx_blitter_save(ctx, ctx->blitter, false /* render cond */);
1135 ctx->blitter, ctx->framebuffer.width, ctx->framebuffer.height,
1136 util_framebuffer_get_num_layers(&ctx->framebuffer), slowclear, color,
1138 util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
1142 agx_batch_init_state(batch);
1144 batch->clear |= fastclear;
1145 batch->resolve |= buffers;
1146 assert((batch->draw & slowclear) == slowclear);
1150 transition_resource(struct pipe_context *pctx, struct agx_resource *rsrc,
1151 struct pipe_resource *templ)
1153 struct agx_resource *new_res =
1154 agx_resource(pctx->screen->resource_create(pctx->screen, templ));
1157 assert(!(rsrc->base.bind & PIPE_BIND_SHARED) && "cannot swap BOs if shared");
1160 BITSET_FOREACH_SET(level, rsrc->data_valid, PIPE_MAX_TEXTURE_LEVELS) {
1161 /* Blit each valid level */
1162 struct pipe_blit_info blit = {0};
1164 u_box_3d(0, 0, 0, rsrc->layout.width_px, rsrc->layout.height_px,
1165 rsrc->layout.depth_px, &blit.dst.box);
1166 blit.src.box = blit.dst.box;
1168 blit.dst.resource = &new_res->base;
1169 blit.dst.format = new_res->base.format;
1170 blit.dst.level = level;
1171 blit.src.resource = &rsrc->base;
1172 blit.src.format = rsrc->base.format;
1173 blit.src.level = level;
1174 blit.mask = util_format_get_mask(blit.src.format);
1175 blit.filter = PIPE_TEX_FILTER_NEAREST;
1176 agx_blit(pctx, &blit);
1179 /* Flush the blits out, to make sure the old resource is no longer used */
1180 agx_flush_writer(agx_context(pctx), new_res, "flush_resource");
1182 /* Copy the bind flags and swap the BOs */
1183 struct agx_bo *old = rsrc->bo;
1184 rsrc->base.bind = new_res->base.bind;
1185 rsrc->layout = new_res->layout;
1186 rsrc->modifier = new_res->modifier;
1187 rsrc->bo = new_res->bo;
1190 /* Free the new resource, which now owns the old BO */
1191 pipe_resource_reference((struct pipe_resource **)&new_res, NULL);
1195 agx_decompress(struct agx_context *ctx, struct agx_resource *rsrc,
1198 if (rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED) {
1199 perf_debug_ctx(ctx, "Decompressing resource due to %s", reason);
1200 } else if (!rsrc->layout.writeable_image) {
1201 perf_debug_ctx(ctx, "Reallocating image due to %s", reason);
1204 struct pipe_resource templ = rsrc->base;
1205 assert(!(templ.bind & PIPE_BIND_SHADER_IMAGE) && "currently compressed");
1206 templ.bind |= PIPE_BIND_SHADER_IMAGE /* forces off compression */;
1207 transition_resource(&ctx->base, rsrc, &templ);
1211 agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
1213 struct agx_resource *rsrc = agx_resource(pres);
1215 /* flush_resource is used to prepare resources for sharing, so if this is not
1216 * already a shareabe resource, make it so
1218 struct agx_bo *old = rsrc->bo;
1219 if (!(old->flags & AGX_BO_SHAREABLE)) {
1220 assert(rsrc->layout.levels == 1 &&
1221 "Shared resources must not be mipmapped");
1222 assert(rsrc->layout.sample_count_sa == 1 &&
1223 "Shared resources must not be multisampled");
1225 assert(!(pres->bind & PIPE_BIND_SHARED));
1227 struct pipe_resource templ = *pres;
1228 templ.bind |= PIPE_BIND_SHARED;
1229 transition_resource(pctx, rsrc, &templ);
1231 /* Otherwise just claim it's already shared */
1232 pres->bind |= PIPE_BIND_SHARED;
1233 agx_flush_writer(agx_context(pctx), rsrc, "flush_resource");
1241 agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1244 struct agx_context *ctx = agx_context(pctx);
1246 agx_flush_all(ctx, "Gallium flush");
1248 /* At this point all pending work has been submitted. Since jobs are
1249 * started and completed sequentially from a UAPI perspective, and since
1250 * we submit all jobs with compute+render barriers on the prior job,
1251 * waiting on the last submitted job is sufficient to guarantee completion
1252 * of all GPU work thus far, so we can create a fence out of the latest
1255 * See this page for more info on how the GPU/UAPI queueing works:
1256 * https://github.com/AsahiLinux/docs/wiki/SW:AGX-driver-notes#queues
1260 struct pipe_fence_handle *f = agx_fence_create(ctx);
1261 pctx->screen->fence_reference(pctx->screen, fence, NULL);
1267 agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
1269 struct agx_device *dev = agx_device(ctx->base.screen);
1271 assert(agx_batch_is_active(batch));
1272 assert(!agx_batch_is_submitted(batch));
1274 /* Make sure there's something to submit. */
1275 if (!batch->clear && !batch->any_draws) {
1276 agx_batch_reset(ctx, batch);
1280 assert(batch->initialized);
1282 /* Finalize the encoder */
1283 uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
1284 memcpy(batch->encoder_current, stop, sizeof(stop));
1286 uint64_t pipeline_background = agx_build_meta(batch, false, false);
1287 uint64_t pipeline_background_partial = agx_build_meta(batch, false, true);
1288 uint64_t pipeline_store = agx_build_meta(batch, true, false);
1290 bool clear_pipeline_textures =
1291 agx_tilebuffer_spills(&batch->tilebuffer_layout);
1293 for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1294 struct pipe_surface *surf = batch->key.cbufs[i];
1296 if (surf && surf->texture) {
1297 struct agx_resource *rt = agx_resource(surf->texture);
1298 BITSET_SET(rt->data_valid, surf->u.tex.level);
1300 if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
1301 clear_pipeline_textures = true;
1305 struct agx_resource *zbuf =
1306 batch->key.zsbuf ? agx_resource(batch->key.zsbuf->texture) : NULL;
1309 unsigned level = batch->key.zsbuf->u.tex.level;
1310 BITSET_SET(zbuf->data_valid, level);
1312 if (zbuf->separate_stencil)
1313 BITSET_SET(zbuf->separate_stencil->data_valid, level);
1316 /* Scissor and depth bias arrays are staged to dynamic arrays on the CPU. At
1317 * submit time, they're done growing and are uploaded to GPU memory attached
1320 uint64_t scissor = agx_pool_upload_aligned(&batch->pool, batch->scissor.data,
1321 batch->scissor.size, 64);
1322 uint64_t zbias = agx_pool_upload_aligned(
1323 &batch->pool, batch->depth_bias.data, batch->depth_bias.size, 64);
1325 /* BO list for a given batch consists of:
1326 * - BOs for the batch's pools
1327 * - BOs for the encoder
1328 * - BO for internal shaders
1329 * - BOs added to the batch explicitly
1331 agx_batch_add_bo(batch, batch->encoder);
1333 /* Occlusion queries are allocated as a contiguous pool */
1335 util_dynarray_num_elements(&batch->occlusion_queries, struct agx_query *);
1336 size_t oq_size = oq_count * sizeof(uint64_t);
1339 batch->occlusion_buffer =
1340 agx_pool_alloc_aligned(&batch->pool, oq_size, 64);
1341 memset(batch->occlusion_buffer.cpu, 0, oq_size);
1343 batch->occlusion_buffer.gpu = 0;
1346 unsigned handle_count = agx_batch_num_bo(batch) +
1347 agx_pool_num_bos(&batch->pool) +
1348 agx_pool_num_bos(&batch->pipeline_pool);
1350 uint32_t *handles = calloc(sizeof(uint32_t), handle_count);
1351 unsigned handle = 0, handle_i = 0;
1353 AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
1354 handles[handle_i++] = handle;
1357 agx_pool_get_bo_handles(&batch->pool, handles + handle_i);
1358 handle_i += agx_pool_num_bos(&batch->pool);
1360 agx_pool_get_bo_handles(&batch->pipeline_pool, handles + handle_i);
1361 handle_i += agx_pool_num_bos(&batch->pipeline_pool);
1363 /* Size calculation should've been exact */
1364 assert(handle_i == handle_count);
1366 /* TODO: Linux UAPI submission */
1370 (void)clear_pipeline_textures;
1371 (void)pipeline_store;
1372 (void)pipeline_background;
1373 (void)pipeline_background_partial;
1375 unreachable("Linux UAPI not yet upstream");
1376 agx_batch_submit(ctx, batch, 0, 0, NULL);
1380 agx_destroy_context(struct pipe_context *pctx)
1382 struct agx_device *dev = agx_device(pctx->screen);
1383 struct agx_context *ctx = agx_context(pctx);
1385 /* Batch state needs to be freed on completion, and we don't want to yank
1386 * buffers out from in-progress GPU jobs to avoid faults, so just wait until
1387 * everything in progress is actually done on context destroy. This will
1388 * ensure everything is cleaned up properly.
1390 agx_sync_all(ctx, "destroy context");
1392 if (pctx->stream_uploader)
1393 u_upload_destroy(pctx->stream_uploader);
1396 util_blitter_destroy(ctx->blitter);
1398 util_unreference_framebuffer_state(&ctx->framebuffer);
1400 agx_meta_cleanup(&ctx->meta);
1402 agx_bo_unreference(ctx->result_buf);
1404 drmSyncobjDestroy(dev->fd, ctx->in_sync_obj);
1405 drmSyncobjDestroy(dev->fd, ctx->dummy_syncobj);
1406 if (ctx->in_sync_fd != -1)
1407 close(ctx->in_sync_fd);
1409 for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
1410 if (ctx->batches.slots[i].syncobj)
1411 drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
1418 agx_invalidate_resource(struct pipe_context *pctx,
1419 struct pipe_resource *resource)
1421 struct agx_context *ctx = agx_context(pctx);
1422 struct agx_batch *batch = agx_get_batch(ctx);
1424 /* Handle the glInvalidateFramebuffer case */
1425 if (batch->key.zsbuf && batch->key.zsbuf->texture == resource)
1426 batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
1428 for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1429 struct pipe_surface *surf = batch->key.cbufs[i];
1431 if (surf && surf->texture == resource)
1432 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1437 agx_memory_barrier(struct pipe_context *pctx, unsigned flags)
1439 /* Be conservative for now, we can try to optimize this more later */
1440 agx_flush_all(agx_context(pctx), "Memory barrier");
1443 static struct pipe_context *
1444 agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
1446 struct agx_context *ctx = rzalloc(NULL, struct agx_context);
1447 struct pipe_context *pctx = &ctx->base;
1453 pctx->screen = screen;
1456 util_dynarray_init(&ctx->writer, ctx);
1457 util_dynarray_init(&ctx->global_buffers, ctx);
1459 pctx->stream_uploader = u_upload_create_default(pctx);
1460 if (!pctx->stream_uploader) {
1464 pctx->const_uploader = pctx->stream_uploader;
1466 pctx->destroy = agx_destroy_context;
1467 pctx->flush = agx_flush;
1468 pctx->clear = agx_clear;
1469 pctx->resource_copy_region = util_resource_copy_region;
1470 pctx->blit = agx_blit;
1471 pctx->generate_mipmap = agx_generate_mipmap;
1472 pctx->flush_resource = agx_flush_resource;
1474 pctx->buffer_map = u_transfer_helper_transfer_map;
1475 pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
1476 pctx->texture_map = u_transfer_helper_transfer_map;
1477 pctx->texture_unmap = u_transfer_helper_transfer_unmap;
1478 pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1480 pctx->buffer_subdata = u_default_buffer_subdata;
1481 pctx->clear_buffer = u_default_clear_buffer;
1482 pctx->texture_subdata = u_default_texture_subdata;
1483 pctx->set_debug_callback = u_default_set_debug_callback;
1484 pctx->get_sample_position = u_default_get_sample_position;
1485 pctx->invalidate_resource = agx_invalidate_resource;
1486 pctx->memory_barrier = agx_memory_barrier;
1488 pctx->create_fence_fd = agx_create_fence_fd;
1489 pctx->fence_server_sync = agx_fence_server_sync;
1491 agx_init_state_functions(pctx);
1492 agx_init_query_functions(pctx);
1493 agx_init_streamout_functions(pctx);
1495 agx_meta_init(&ctx->meta, agx_device(screen));
1497 ctx->blitter = util_blitter_create(pctx);
1499 ctx->result_buf = agx_bo_create(
1500 agx_device(screen), sizeof(union agx_batch_result) * AGX_MAX_BATCHES,
1501 AGX_BO_WRITEBACK, "Batch result buffer");
1502 assert(ctx->result_buf);
1504 /* Sync object/FD used for NATIVE_FENCE_FD. */
1505 ctx->in_sync_fd = -1;
1506 ret = drmSyncobjCreate(agx_device(screen)->fd, 0, &ctx->in_sync_obj);
1509 /* Dummy sync object used before any work has been submitted. */
1510 ret = drmSyncobjCreate(agx_device(screen)->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
1511 &ctx->dummy_syncobj);
1513 ctx->syncobj = ctx->dummy_syncobj;
1515 /* By default all samples are enabled */
1516 ctx->sample_mask = ~0;
1522 agx_get_vendor(struct pipe_screen *pscreen)
1528 agx_get_device_vendor(struct pipe_screen *pscreen)
1534 agx_get_name(struct pipe_screen *pscreen)
1536 struct agx_device *dev = agx_device(pscreen);
1542 agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
1544 bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP;
1547 case PIPE_CAP_NPOT_TEXTURES:
1548 case PIPE_CAP_SHADER_STENCIL_EXPORT:
1549 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
1550 case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
1551 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
1552 case PIPE_CAP_DEPTH_CLIP_DISABLE:
1553 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
1554 case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
1555 case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
1556 case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
1557 case PIPE_CAP_FS_FINE_DERIVATIVE:
1560 /* We could support ARB_clip_control by toggling the clip control bit for
1561 * the render pass. Because this bit is for the whole render pass,
1562 * switching clip modes necessarily incurs a flush. This should be ok, from
1563 * the ARB_clip_control spec:
1565 * Some implementations may introduce a flush when changing the
1566 * clip control state. Hence frequent clip control changes are
1569 * However, this would require tuning to ensure we don't flush unnecessary
1570 * when using u_blitter clears, for example. As we don't yet have a use case,
1571 * don't expose the feature.
1573 case PIPE_CAP_CLIP_HALFZ:
1576 case PIPE_CAP_MAX_RENDER_TARGETS:
1577 case PIPE_CAP_FBFETCH:
1578 case PIPE_CAP_FBFETCH_COHERENT:
1580 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
1583 case PIPE_CAP_OCCLUSION_QUERY:
1584 case PIPE_CAP_GENERATE_MIPMAP:
1585 case PIPE_CAP_PRIMITIVE_RESTART:
1586 case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
1587 case PIPE_CAP_ANISOTROPIC_FILTER:
1588 case PIPE_CAP_NATIVE_FENCE_FD:
1591 case PIPE_CAP_SAMPLER_VIEW_TARGET:
1592 case PIPE_CAP_TEXTURE_SWIZZLE:
1593 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
1594 case PIPE_CAP_INDEP_BLEND_ENABLE:
1595 case PIPE_CAP_INDEP_BLEND_FUNC:
1596 case PIPE_CAP_ACCELERATED:
1598 case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
1599 case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
1600 case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
1601 case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
1602 case PIPE_CAP_PACKED_UNIFORMS:
1603 case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
1604 case PIPE_CAP_VS_INSTANCEID:
1605 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
1606 case PIPE_CAP_CONDITIONAL_RENDER:
1607 case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
1608 case PIPE_CAP_SEAMLESS_CUBE_MAP:
1609 case PIPE_CAP_LOAD_CONSTBUF:
1610 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
1611 case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
1612 case PIPE_CAP_NULL_TEXTURES:
1613 case PIPE_CAP_TEXTURE_MULTISAMPLE:
1614 case PIPE_CAP_IMAGE_LOAD_FORMATTED:
1615 case PIPE_CAP_IMAGE_STORE_FORMATTED:
1616 case PIPE_CAP_COMPUTE:
1617 case PIPE_CAP_INT64:
1618 case PIPE_CAP_SAMPLE_SHADING:
1620 case PIPE_CAP_SURFACE_SAMPLE_COUNT:
1624 case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
1627 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
1628 return PIPE_MAX_SO_BUFFERS;
1630 case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
1631 case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
1632 return PIPE_MAX_SO_OUTPUTS;
1634 case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
1635 case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
1638 case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
1641 case PIPE_CAP_GLSL_FEATURE_LEVEL:
1642 case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
1643 return is_deqp ? 330 : 140;
1644 case PIPE_CAP_ESSL_FEATURE_LEVEL:
1647 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
1650 case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
1651 return AGX_TEXTURE_BUFFER_MAX_SIZE;
1653 case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
1656 case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
1659 /* We run nir_lower_point_size so we need the GLSL linker to copy
1660 * the original gl_PointSize when captured by transform feedback. We could
1661 * also copy it ourselves but it's easier to set the CAP.
1663 case PIPE_CAP_PSIZ_CLAMPED:
1666 case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
1668 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
1669 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
1672 case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
1673 case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
1674 case PIPE_CAP_TGSI_TEXCOORD:
1675 case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
1676 case PIPE_CAP_FS_POSITION_IS_SYSVAL:
1678 case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
1679 case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
1680 case PIPE_CAP_FS_POINT_IS_SYSVAL:
1683 case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
1686 case PIPE_CAP_TEXTURE_TRANSFER_MODES:
1687 return PIPE_TEXTURE_TRANSFER_BLIT;
1689 case PIPE_CAP_ENDIANNESS:
1690 return PIPE_ENDIAN_LITTLE;
1692 case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
1694 case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
1696 case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
1698 case PIPE_CAP_DRAW_INDIRECT:
1701 case PIPE_CAP_VIDEO_MEMORY: {
1702 uint64_t system_memory;
1704 if (!os_get_total_physical_memory(&system_memory))
1707 return (int)(system_memory >> 20);
1710 case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
1713 case PIPE_CAP_MAX_VARYINGS:
1716 case PIPE_CAP_FLATSHADE:
1717 case PIPE_CAP_TWO_SIDED_COLOR:
1718 case PIPE_CAP_ALPHA_TEST:
1719 case PIPE_CAP_POINT_SIZE_FIXED:
1720 case PIPE_CAP_CLIP_PLANES:
1721 case PIPE_CAP_NIR_IMAGES_AS_DEREF:
1724 case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
1725 return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
1727 case PIPE_CAP_SUPPORTED_PRIM_MODES:
1728 case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:
1729 return BITFIELD_BIT(MESA_PRIM_POINTS) | BITFIELD_BIT(MESA_PRIM_LINES) |
1730 BITFIELD_BIT(MESA_PRIM_LINE_STRIP) |
1731 BITFIELD_BIT(MESA_PRIM_LINE_LOOP) |
1732 BITFIELD_BIT(MESA_PRIM_TRIANGLES) |
1733 BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) |
1734 BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN) |
1735 BITFIELD_BIT(MESA_PRIM_QUADS) | BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
1737 case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
1741 return u_pipe_screen_get_param_defaults(pscreen, param);
1746 agx_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
1749 case PIPE_CAPF_MIN_LINE_WIDTH:
1750 case PIPE_CAPF_MIN_LINE_WIDTH_AA:
1751 case PIPE_CAPF_MIN_POINT_SIZE:
1752 case PIPE_CAPF_MIN_POINT_SIZE_AA:
1755 case PIPE_CAPF_POINT_SIZE_GRANULARITY:
1756 case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
1759 case PIPE_CAPF_MAX_LINE_WIDTH:
1760 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
1761 return 16.0; /* Off-by-one fixed point 4:4 encoding */
1763 case PIPE_CAPF_MAX_POINT_SIZE:
1764 case PIPE_CAPF_MAX_POINT_SIZE_AA:
1767 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
1770 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
1771 return 16.0; /* arbitrary */
1773 case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
1774 case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
1775 case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
1779 debug_printf("Unexpected PIPE_CAPF %d query\n", param);
1785 agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
1786 enum pipe_shader_cap param)
1788 bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
1791 case PIPE_SHADER_VERTEX:
1792 case PIPE_SHADER_FRAGMENT:
1793 case PIPE_SHADER_COMPUTE:
1799 /* Don't allow side effects with vertex processing. The APIs don't require it
1800 * and it may be problematic on our hardware.
1802 bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
1804 /* this is probably not totally correct.. but it's a start: */
1806 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
1807 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
1808 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
1809 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
1812 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
1815 case PIPE_SHADER_CAP_MAX_INPUTS:
1818 case PIPE_SHADER_CAP_MAX_OUTPUTS:
1819 return shader == PIPE_SHADER_FRAGMENT ? 8 : 16;
1821 case PIPE_SHADER_CAP_MAX_TEMPS:
1822 return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
1824 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
1825 return 16 * 1024 * sizeof(float);
1827 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
1830 case PIPE_SHADER_CAP_CONT_SUPPORTED:
1833 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
1834 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
1835 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
1836 case PIPE_SHADER_CAP_SUBROUTINES:
1837 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
1840 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
1841 case PIPE_SHADER_CAP_INTEGERS:
1844 case PIPE_SHADER_CAP_FP16:
1845 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
1846 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
1847 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
1849 case PIPE_SHADER_CAP_INT16:
1850 /* GLSL compiler is broken. Flip this on when Panfrost does. */
1853 case PIPE_SHADER_CAP_INT64_ATOMICS:
1854 case PIPE_SHADER_CAP_DROUND_SUPPORTED:
1855 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
1858 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
1861 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
1862 return PIPE_MAX_SHADER_SAMPLER_VIEWS;
1864 case PIPE_SHADER_CAP_SUPPORTED_IRS:
1865 return (1 << PIPE_SHADER_IR_NIR);
1867 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
1868 return allow_side_effects ? PIPE_MAX_SHADER_BUFFERS : 0;
1870 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
1871 return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
1873 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
1874 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
1878 /* Other params are unknown */
1886 agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
1887 enum pipe_compute_cap param, void *ret)
1892 memcpy(ret, x, sizeof(x)); \
1897 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
1898 RET((uint32_t[]){64});
1900 case PIPE_COMPUTE_CAP_IR_TARGET:
1902 sprintf(ret, "agx");
1903 return strlen("agx") * sizeof(char);
1905 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
1906 RET((uint64_t[]){3});
1908 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
1909 RET(((uint64_t[]){65535, 65535, 65535}));
1911 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
1912 RET(((uint64_t[]){256, 256, 256}));
1914 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
1915 RET((uint64_t[]){256});
1917 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
1918 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
1919 uint64_t system_memory;
1921 if (!os_get_total_physical_memory(&system_memory))
1924 RET((uint64_t[]){system_memory});
1927 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
1928 RET((uint64_t[]){32768});
1930 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
1931 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
1932 RET((uint64_t[]){4096});
1934 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
1935 RET((uint32_t[]){800 /* MHz -- TODO */});
1937 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
1938 RET((uint32_t[]){4 /* TODO */});
1940 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
1941 RET((uint32_t[]){1});
1943 case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
1944 RET((uint32_t[]){32});
1946 case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
1947 RET((uint32_t[]){0 /* TODO */});
1949 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
1950 RET((uint64_t[]){1024}); // TODO
1957 agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format,
1958 enum pipe_texture_target target, unsigned sample_count,
1959 unsigned storage_sample_count, unsigned usage)
1961 assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
1962 target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
1963 target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
1964 target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
1965 target == PIPE_TEXTURE_CUBE_ARRAY);
1967 if (sample_count > 1 && sample_count != 4 && sample_count != 2)
1970 if (sample_count > 1 && agx_device(pscreen)->debug & AGX_DBG_NOMSAA)
1973 if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
1976 if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format))
1979 /* For framebuffer_no_attachments, fake support for "none" images */
1980 if (format == PIPE_FORMAT_NONE)
1983 if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
1984 enum pipe_format tex_format = format;
1986 /* Mimic the fixup done in create_sampler_view and u_transfer_helper so we
1987 * advertise GL_OES_texture_stencil8. Alternatively, we could make mesa/st
1990 if (tex_format == PIPE_FORMAT_X24S8_UINT)
1991 tex_format = PIPE_FORMAT_S8_UINT;
1993 struct agx_pixel_format_entry ent = agx_pixel_format[tex_format];
1995 if (!agx_is_valid_pixel_format(tex_format))
1998 /* RGB32 is emulated for texture buffers only */
1999 if (ent.channels == AGX_CHANNELS_R32G32B32_EMULATED &&
2000 target != PIPE_BUFFER)
2003 if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable)
2007 if (usage & PIPE_BIND_DEPTH_STENCIL) {
2009 /* natively supported */
2010 case PIPE_FORMAT_Z16_UNORM:
2011 case PIPE_FORMAT_Z32_FLOAT:
2012 case PIPE_FORMAT_S8_UINT:
2014 /* lowered by u_transfer_helper to one of the above */
2015 case PIPE_FORMAT_Z24X8_UNORM:
2016 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2017 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2029 agx_query_dmabuf_modifiers(struct pipe_screen *screen, enum pipe_format format,
2030 int max, uint64_t *modifiers,
2031 unsigned int *external_only, int *out_count)
2036 *out_count = ARRAY_SIZE(agx_best_modifiers);
2040 for (i = 0; i < ARRAY_SIZE(agx_best_modifiers) && i < max; i++) {
2042 external_only[i] = 0;
2044 modifiers[i] = agx_best_modifiers[i];
2047 /* Return the number of modifiers copied */
2052 agx_is_dmabuf_modifier_supported(struct pipe_screen *screen, uint64_t modifier,
2053 enum pipe_format format, bool *external_only)
2056 *external_only = false;
2058 for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
2059 if (agx_best_modifiers[i] == modifier)
2067 agx_destroy_screen(struct pipe_screen *pscreen)
2069 struct agx_screen *screen = agx_screen(pscreen);
2072 screen->dev.ro->destroy(screen->dev.ro);
2074 u_transfer_helper_destroy(pscreen->transfer_helper);
2075 agx_close_device(&screen->dev);
2076 disk_cache_destroy(screen->disk_cache);
2077 ralloc_free(screen);
2081 agx_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
2082 enum pipe_shader_type shader)
2084 return &agx_nir_options;
2088 agx_resource_set_stencil(struct pipe_resource *prsrc,
2089 struct pipe_resource *stencil)
2091 agx_resource(prsrc)->separate_stencil = agx_resource(stencil);
2094 static struct pipe_resource *
2095 agx_resource_get_stencil(struct pipe_resource *prsrc)
2097 return (struct pipe_resource *)agx_resource(prsrc)->separate_stencil;
2100 static enum pipe_format
2101 agx_resource_get_internal_format(struct pipe_resource *prsrc)
2103 return agx_resource(prsrc)->layout.format;
2106 static struct disk_cache *
2107 agx_get_disk_shader_cache(struct pipe_screen *pscreen)
2109 return agx_screen(pscreen)->disk_cache;
2112 static const struct u_transfer_vtbl transfer_vtbl = {
2113 .resource_create = agx_resource_create,
2114 .resource_destroy = agx_resource_destroy,
2115 .transfer_map = agx_transfer_map,
2116 .transfer_unmap = agx_transfer_unmap,
2117 .transfer_flush_region = agx_transfer_flush_region,
2118 .get_internal_format = agx_resource_get_internal_format,
2119 .set_stencil = agx_resource_set_stencil,
2120 .get_stencil = agx_resource_get_stencil,
2124 agx_screen_get_fd(struct pipe_screen *pscreen)
2126 return agx_device(pscreen)->fd;
2129 struct pipe_screen *
2130 agx_screen_create(int fd, struct renderonly *ro)
2132 struct agx_screen *agx_screen;
2133 struct pipe_screen *screen;
2135 agx_screen = rzalloc(NULL, struct agx_screen);
2139 screen = &agx_screen->pscreen;
2141 /* Set debug before opening */
2142 agx_screen->dev.debug =
2143 debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0);
2145 agx_screen->dev.fd = fd;
2146 agx_screen->dev.ro = ro;
2148 /* Try to open an AGX device */
2149 if (!agx_open_device(screen, &agx_screen->dev)) {
2150 ralloc_free(agx_screen);
2154 if (agx_screen->dev.debug & AGX_DBG_DEQP) {
2155 /* You're on your own. */
2156 static bool warned_about_hacks = false;
2158 if (!warned_about_hacks) {
2159 agx_msg("\n------------------\n"
2160 "Unsupported debug parameter set. Expect breakage.\n"
2161 "Do not report bugs.\n"
2162 "------------------\n\n");
2163 warned_about_hacks = true;
2167 screen->destroy = agx_destroy_screen;
2168 screen->get_screen_fd = agx_screen_get_fd;
2169 screen->get_name = agx_get_name;
2170 screen->get_vendor = agx_get_vendor;
2171 screen->get_device_vendor = agx_get_device_vendor;
2172 screen->get_param = agx_get_param;
2173 screen->get_shader_param = agx_get_shader_param;
2174 screen->get_compute_param = agx_get_compute_param;
2175 screen->get_paramf = agx_get_paramf;
2176 screen->is_format_supported = agx_is_format_supported;
2177 screen->query_dmabuf_modifiers = agx_query_dmabuf_modifiers;
2178 screen->is_dmabuf_modifier_supported = agx_is_dmabuf_modifier_supported;
2179 screen->context_create = agx_create_context;
2180 screen->resource_from_handle = agx_resource_from_handle;
2181 screen->resource_get_handle = agx_resource_get_handle;
2182 screen->resource_get_param = agx_resource_get_param;
2183 screen->resource_create_with_modifiers = agx_resource_create_with_modifiers;
2184 screen->get_timestamp = u_default_get_timestamp;
2185 screen->fence_reference = agx_fence_reference;
2186 screen->fence_finish = agx_fence_finish;
2187 screen->fence_get_fd = agx_fence_get_fd;
2188 screen->get_compiler_options = agx_get_compiler_options;
2189 screen->get_disk_shader_cache = agx_get_disk_shader_cache;
2191 screen->resource_create = u_transfer_helper_resource_create;
2192 screen->resource_destroy = u_transfer_helper_resource_destroy;
2193 screen->transfer_helper = u_transfer_helper_create(
2195 U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
2196 U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F);
2198 agx_disk_cache_init(agx_screen);