src/gallium/drivers/asahi/agx_pipe.c

   1 /*
   2  * Copyright 2010 Red Hat Inc.
   3  * Copyright 2014-2017 Broadcom
   4  * Copyright 2019-2020 Collabora, Ltd.
   5  * Copyright 2006 VMware, Inc.
   6  * SPDX-License-Identifier: MIT
   7  */
   8 #include <errno.h>
   9 #include <stdio.h>
  10 #include <xf86drm.h>
  11 #include "asahi/compiler/agx_compile.h"
  12 #include "asahi/layout/layout.h"
  13 #include "asahi/lib/agx_formats.h"
  14 #include "asahi/lib/decode.h"
  15 #include "drm-uapi/drm_fourcc.h"
  16 #include "frontend/winsys_handle.h"
  17 #include "gallium/auxiliary/renderonly/renderonly.h"
  18 #include "gallium/auxiliary/util/u_debug_cb.h"
  19 #include "gallium/auxiliary/util/u_framebuffer.h"
  20 #include "gallium/auxiliary/util/u_sample_positions.h"
  21 #include "gallium/auxiliary/util/u_surface.h"
  22 #include "gallium/auxiliary/util/u_transfer.h"
  23 #include "gallium/auxiliary/util/u_transfer_helper.h"
  24 #include "pipe/p_context.h"
  25 #include "pipe/p_defines.h"
  26 #include "pipe/p_screen.h"
  27 #include "pipe/p_state.h"
  28 #include "util/format/u_format.h"
  29 #include "util/half_float.h"
  30 #include "util/u_drm.h"
  31 #include "util/u_gen_mipmap.h"
  32 #include "util/u_inlines.h"
  33 #include "util/u_memory.h"
  34 #include "util/u_screen.h"
  35 #include "util/u_upload_mgr.h"
  36 #include "agx_device.h"
  37 #include "agx_disk_cache.h"
  38 #include "agx_fence.h"
  39 #include "agx_public.h"
  40 #include "agx_state.h"
  41 #include "agx_tilebuffer.h"
  42
  43 /* Fake values, pending UAPI upstreaming */
  44 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
  45 #define DRM_FORMAT_MOD_APPLE_TWIDDLED (2)
  46 #endif
  47 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED
  48 #define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3)
  49 #endif
  50
  51 /* clang-format off */
  52 static const struct debug_named_value agx_debug_options[] = {
  53    {"trace",     AGX_DBG_TRACE,    "Trace the command stream"},
  54    {"deqp",      AGX_DBG_DEQP,     "Hacks for dEQP"},
  55    {"no16",      AGX_DBG_NO16,     "Disable 16-bit support"},
  56    {"perf",      AGX_DBG_PERF,     "Print performance warnings"},
  57 #ifndef NDEBUG
  58    {"dirty",     AGX_DBG_DIRTY,    "Disable dirty tracking"},
  59 #endif
  60    {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"},
  61    {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"},
  62    {"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"},
  63    {"sync",      AGX_DBG_SYNC,     "Synchronously wait for all submissions"},
  64    {"stats",     AGX_DBG_STATS,    "Show command execution statistics"},
  65    {"resource",  AGX_DBG_RESOURCE, "Log resource operations"},
  66    {"batch",     AGX_DBG_BATCH,    "Log batches"},
  67    {"nowc",      AGX_DBG_NOWC,     "Disable write-combining"},
  68    {"synctvb",   AGX_DBG_SYNCTVB,  "Synchronous TVB growth"},
  69    {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"},
  70    {"nomsaa",    AGX_DBG_NOMSAA,   "Force disable MSAA"},
  71    {"noshadow",  AGX_DBG_NOSHADOW, "Force disable resource shadowing"},
  72    DEBUG_NAMED_VALUE_END
  73 };
  74 /* clang-format on */
  75
  76 uint64_t agx_best_modifiers[] = {
  77    DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED,
  78    DRM_FORMAT_MOD_APPLE_TWIDDLED,
  79    DRM_FORMAT_MOD_LINEAR,
  80 };
  81
  82 /* These limits are arbitrarily chosen and subject to change as
  83  * we discover more workloads with heavy shadowing.
  84  *
  85  * Maximum size of a shadowed object in bytes.
  86  * Hint: 1024x1024xRGBA8 = 4 MiB. Go higher for compression.
  87  */
  88 #define MAX_SHADOW_BYTES (6 * 1024 * 1024)
  89
  90 /* Maximum cumulative size to shadow an object before we flush.
  91  * Allows shadowing a 4MiB + meta object 8 times with the logic
  92  * below (+1 shadow offset implied).
  93  */
  94 #define MAX_TOTAL_SHADOW_BYTES (32 * 1024 * 1024)
  95
  96 void agx_init_state_functions(struct pipe_context *ctx);
  97
  98 /*
  99  * resource
 100  */
 101
 102 static enum ail_tiling
 103 ail_modifier_to_tiling(uint64_t modifier)
 104 {
 105    switch (modifier) {
 106    case DRM_FORMAT_MOD_LINEAR:
 107       return AIL_TILING_LINEAR;
 108    case DRM_FORMAT_MOD_APPLE_TWIDDLED:
 109       return AIL_TILING_TWIDDLED;
 110    case DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED:
 111       return AIL_TILING_TWIDDLED_COMPRESSED;
 112    default:
 113       unreachable("Unsupported modifier");
 114    }
 115 }
 116
 117 const static char *s_tiling[] = {
 118    [AIL_TILING_LINEAR] = "LINR",
 119    [AIL_TILING_TWIDDLED] = "TWID",
 120    [AIL_TILING_TWIDDLED_COMPRESSED] = "COMP",
 121 };
 122
 123 #define rsrc_debug(res, ...)                                                   \
 124    do {                                                                        \
 125       if (agx_device((res)->base.screen)->debug & AGX_DBG_RESOURCE)            \
 126          agx_msg(__VA_ARGS__);                                                 \
 127    } while (0)
 128
 129 static void
 130 agx_resource_debug(struct agx_resource *res, const char *msg)
 131 {
 132    if (!(agx_device(res->base.screen)->debug & AGX_DBG_RESOURCE))
 133       return;
 134
 135    int ino = -1;
 136    if (res->bo->prime_fd >= 0) {
 137       struct stat sb;
 138       if (!fstat(res->bo->prime_fd, &sb))
 139          ino = sb.st_ino;
 140    }
 141
 142    agx_msg(
 143       "%s%s %dx%dx%d %dL %d/%dM %dS M:%llx %s %s%s S:0x%llx LS:0x%llx CS:0x%llx "
 144       "Base=0x%llx Size=0x%llx Meta=0x%llx/0x%llx (%s) %s%s%s%s%s%sfd:%d(%d) @ %p\n",
 145       msg ?: "", util_format_short_name(res->base.format), res->base.width0,
 146       res->base.height0, res->base.depth0, res->base.array_size,
 147       res->base.last_level, res->layout.levels, res->layout.sample_count_sa,
 148       (long long)res->modifier, s_tiling[res->layout.tiling],
 149       res->layout.mipmapped_z ? "MZ " : "",
 150       res->layout.page_aligned_layers ? "PL " : "",
 151       (long long)res->layout.linear_stride_B,
 152       (long long)res->layout.layer_stride_B,
 153       (long long)res->layout.compression_layer_stride_B,
 154       (long long)res->bo->ptr.gpu, (long long)res->layout.size_B,
 155       res->layout.metadata_offset_B
 156          ? ((long long)res->bo->ptr.gpu + res->layout.metadata_offset_B)
 157          : 0,
 158       (long long)res->layout.metadata_offset_B, res->bo->label,
 159       res->bo->flags & AGX_BO_SHARED ? "SH " : "",
 160       res->bo->flags & AGX_BO_LOW_VA ? "LO " : "",
 161       res->bo->flags & AGX_BO_EXEC ? "EX " : "",
 162       res->bo->flags & AGX_BO_WRITEBACK ? "WB " : "",
 163       res->bo->flags & AGX_BO_SHAREABLE ? "SA " : "",
 164       res->bo->flags & AGX_BO_READONLY ? "RO " : "", res->bo->prime_fd, ino,
 165       res);
 166 }
 167
 168 static void
 169 agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource)
 170 {
 171    struct pipe_resource *templ = &nresource->base;
 172
 173    nresource->layout = (struct ail_layout){
 174       .tiling = ail_modifier_to_tiling(nresource->modifier),
 175       .mipmapped_z = templ->target == PIPE_TEXTURE_3D,
 176       .format = templ->format,
 177       .width_px = templ->width0,
 178       .height_px = templ->height0,
 179       .depth_px = templ->depth0 * templ->array_size,
 180       .sample_count_sa = MAX2(templ->nr_samples, 1),
 181       .levels = templ->last_level + 1,
 182       .writeable_image = templ->bind & PIPE_BIND_SHADER_IMAGE,
 183    };
 184 }
 185
 186 static struct pipe_resource *
 187 agx_resource_from_handle(struct pipe_screen *pscreen,
 188                          const struct pipe_resource *templat,
 189                          struct winsys_handle *whandle, unsigned usage)
 190 {
 191    struct agx_device *dev = agx_device(pscreen);
 192    struct agx_resource *rsc;
 193    struct pipe_resource *prsc;
 194
 195    assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
 196
 197    rsc = CALLOC_STRUCT(agx_resource);
 198    if (!rsc)
 199       return NULL;
 200
 201    rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID
 202                       ? DRM_FORMAT_MOD_LINEAR
 203                       : whandle->modifier;
 204
 205    /* We need strides to be aligned. ail asserts this, but we want to fail
 206     * gracefully so the app can handle the error.
 207     */
 208    if (rsc->modifier == DRM_FORMAT_MOD_LINEAR && (whandle->stride % 16) != 0) {
 209       FREE(rsc);
 210       return false;
 211    }
 212
 213    prsc = &rsc->base;
 214
 215    *prsc = *templat;
 216
 217    pipe_reference_init(&prsc->reference, 1);
 218    prsc->screen = pscreen;
 219
 220    rsc->bo = agx_bo_import(dev, whandle->handle);
 221    /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
 222     * memory space to mmap it etc.
 223     */
 224    if (!rsc->bo) {
 225       FREE(rsc);
 226       return NULL;
 227    }
 228
 229    agx_resource_setup(dev, rsc);
 230
 231    if (rsc->layout.tiling == AIL_TILING_LINEAR) {
 232       rsc->layout.linear_stride_B = whandle->stride;
 233    } else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
 234       FREE(rsc);
 235       return NULL;
 236    }
 237
 238    assert(whandle->offset == 0);
 239
 240    ail_make_miptree(&rsc->layout);
 241
 242    if (prsc->target == PIPE_BUFFER) {
 243       assert(rsc->layout.tiling == AIL_TILING_LINEAR);
 244       util_range_init(&rsc->valid_buffer_range);
 245    }
 246
 247    agx_resource_debug(rsc, "Import: ");
 248
 249    return prsc;
 250 }
 251
 252 static bool
 253 agx_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *ctx,
 254                         struct pipe_resource *pt, struct winsys_handle *handle,
 255                         unsigned usage)
 256 {
 257    struct agx_device *dev = agx_device(pscreen);
 258    struct pipe_resource *cur = pt;
 259
 260    /* Even though asahi doesn't support multi-planar formats, we
 261     * can get here through GBM, which does. Walk the list of planes
 262     * to find the right one.
 263     */
 264    for (int i = 0; i < handle->plane; i++) {
 265       cur = cur->next;
 266       if (!cur)
 267          return false;
 268    }
 269
 270    struct agx_resource *rsrc = agx_resource(cur);
 271
 272    if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
 273       rsrc_debug(rsrc, "Get handle: %p (KMS RO)\n", rsrc);
 274
 275       if (!rsrc->scanout && dev->ro && (rsrc->base.bind & PIPE_BIND_SCANOUT)) {
 276          rsrc->scanout =
 277             renderonly_scanout_for_resource(&rsrc->base, dev->ro, NULL);
 278       }
 279
 280       if (!rsrc->scanout)
 281          return false;
 282
 283       return renderonly_get_handle(rsrc->scanout, handle);
 284    } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
 285       rsrc_debug(rsrc, "Get handle: %p (KMS)\n", rsrc);
 286
 287       handle->handle = rsrc->bo->handle;
 288    } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
 289       int fd = agx_bo_export(rsrc->bo);
 290
 291       if (fd < 0)
 292          return false;
 293
 294       handle->handle = fd;
 295       if (dev->debug & AGX_DBG_RESOURCE) {
 296          struct stat sb;
 297          fstat(rsrc->bo->prime_fd, &sb);
 298          agx_msg("Get handle: %p (FD %d/%ld)\n", rsrc, fd, (long)sb.st_ino);
 299       }
 300    } else {
 301       /* Other handle types not supported */
 302       return false;
 303    }
 304
 305    handle->stride = ail_get_wsi_stride_B(&rsrc->layout, 0);
 306    handle->size = rsrc->layout.size_B;
 307    handle->offset = rsrc->layout.level_offsets_B[0];
 308    handle->format = rsrc->layout.format;
 309    handle->modifier = rsrc->modifier;
 310
 311    return true;
 312 }
 313
 314 static bool
 315 agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
 316                        struct pipe_resource *prsc, unsigned plane,
 317                        unsigned layer, unsigned level,
 318                        enum pipe_resource_param param, unsigned usage,
 319                        uint64_t *value)
 320 {
 321    struct agx_resource *rsrc = (struct agx_resource *)prsc;
 322    struct pipe_resource *cur;
 323    unsigned count;
 324
 325    switch (param) {
 326    case PIPE_RESOURCE_PARAM_STRIDE:
 327       *value = ail_get_wsi_stride_B(&rsrc->layout, level);
 328       return true;
 329    case PIPE_RESOURCE_PARAM_OFFSET:
 330       *value = rsrc->layout.level_offsets_B[level];
 331       return true;
 332    case PIPE_RESOURCE_PARAM_MODIFIER:
 333       *value = rsrc->modifier;
 334       return true;
 335    case PIPE_RESOURCE_PARAM_NPLANES:
 336       /* We don't support multi-planar formats, but we should still handle
 337        * this case for GBM shared resources.
 338        */
 339       for (count = 0, cur = prsc; cur; cur = cur->next)
 340          count++;
 341       *value = count;
 342       return true;
 343    default:
 344       return false;
 345    }
 346 }
 347
 348 static bool
 349 agx_is_2d(enum pipe_texture_target target)
 350 {
 351    return (target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_RECT);
 352 }
 353
 354 static bool
 355 agx_linear_allowed(const struct agx_resource *pres)
 356 {
 357    /* Mipmapping not allowed with linear */
 358    if (pres->base.last_level != 0)
 359       return false;
 360
 361    /* Depth/stencil buffers must not be linear */
 362    if (pres->base.bind & PIPE_BIND_DEPTH_STENCIL)
 363       return false;
 364
 365    /* Multisampling not allowed with linear */
 366    if (pres->base.nr_samples > 1)
 367       return false;
 368
 369    /* Block compression not allowed with linear */
 370    if (util_format_is_compressed(pres->base.format))
 371       return false;
 372
 373    switch (pres->base.target) {
 374    /* 1D is always linear, even with image atomics */
 375    case PIPE_BUFFER:
 376    case PIPE_TEXTURE_1D:
 377    case PIPE_TEXTURE_1D_ARRAY:
 378
 379    /* Linear textures require specifying their strides explicitly, which only
 380     * works for 2D textures. Rectangle textures are a special case of 2D.
 381     *
 382     * However, we don't want to support this case in the image atomic
 383     * implementation, so linear shader images are specially forbidden.
 384     */
 385    case PIPE_TEXTURE_2D:
 386    case PIPE_TEXTURE_2D_ARRAY:
 387    case PIPE_TEXTURE_RECT:
 388       if (pres->base.bind & PIPE_BIND_SHADER_IMAGE)
 389          return false;
 390
 391       break;
 392
 393    /* No other texture type can specify a stride */
 394    default:
 395       return false;
 396    }
 397
 398    return true;
 399 }
 400
 401 static bool
 402 agx_twiddled_allowed(const struct agx_resource *pres)
 403 {
 404    /* Certain binds force linear */
 405    if (pres->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_LINEAR))
 406       return false;
 407
 408    /* Buffers must be linear */
 409    if (pres->base.target == PIPE_BUFFER)
 410       return false;
 411
 412    /* Anything else may be twiddled */
 413    return true;
 414 }
 415
 416 static bool
 417 agx_compression_allowed(const struct agx_resource *pres)
 418 {
 419    /* Allow disabling compression for debugging */
 420    if (agx_device(pres->base.screen)->debug & AGX_DBG_NOCOMPRESS) {
 421       rsrc_debug(pres, "No compression: disabled\n");
 422       return false;
 423    }
 424
 425    /* Limited to renderable */
 426    if (pres->base.bind &
 427        ~(PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
 428          PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) {
 429       rsrc_debug(pres, "No compression: not renderable\n");
 430       return false;
 431    }
 432
 433    /* We use the PBE for compression via staging blits, so we can only compress
 434     * renderable formats. As framebuffer compression, other formats don't make a
 435     * ton of sense to compress anyway.
 436     */
 437    if (!agx_pixel_format[pres->base.format].renderable &&
 438        !util_format_is_depth_or_stencil(pres->base.format)) {
 439       rsrc_debug(pres, "No compression: format not renderable\n");
 440       return false;
 441    }
 442
 443    /* Lossy-compressed texture formats cannot be compressed */
 444    assert(!util_format_is_compressed(pres->base.format) &&
 445           "block-compressed formats are not renderable");
 446
 447    if (!ail_can_compress(pres->base.width0, pres->base.height0,
 448                          MAX2(pres->base.nr_samples, 1))) {
 449       rsrc_debug(pres, "No compression: too small\n");
 450       return false;
 451    }
 452
 453    return true;
 454 }
 455
 456 static uint64_t
 457 agx_select_modifier_from_list(const struct agx_resource *pres,
 458                               const uint64_t *modifiers, int count)
 459 {
 460    if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) &&
 461        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers,
 462                          count))
 463       return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
 464
 465    if (agx_twiddled_allowed(pres) &&
 466        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count))
 467       return DRM_FORMAT_MOD_APPLE_TWIDDLED;
 468
 469    if (agx_linear_allowed(pres) &&
 470        drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
 471       return DRM_FORMAT_MOD_LINEAR;
 472
 473    /* We didn't find anything */
 474    return DRM_FORMAT_MOD_INVALID;
 475 }
 476
 477 static uint64_t
 478 agx_select_best_modifier(const struct agx_resource *pres)
 479 {
 480    /* Prefer linear for staging resources, which should be as fast as possible
 481     * to write from the CPU.
 482     */
 483    if (agx_linear_allowed(pres) && pres->base.usage == PIPE_USAGE_STAGING)
 484       return DRM_FORMAT_MOD_LINEAR;
 485
 486    /* For SCANOUT or SHARED resources with no explicit modifier selection, force
 487     * linear since we cannot expect consumers to correctly pass through the
 488     * modifier (unless linear is not allowed at all).
 489     */
 490    if (agx_linear_allowed(pres) &&
 491        pres->base.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
 492       return DRM_FORMAT_MOD_LINEAR;
 493    }
 494
 495    if (agx_twiddled_allowed(pres)) {
 496       if (agx_compression_allowed(pres))
 497          return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
 498       else
 499          return DRM_FORMAT_MOD_APPLE_TWIDDLED;
 500    }
 501
 502    assert(agx_linear_allowed(pres));
 503    return DRM_FORMAT_MOD_LINEAR;
 504 }
 505
 506 static struct pipe_resource *
 507 agx_resource_create_with_modifiers(struct pipe_screen *screen,
 508                                    const struct pipe_resource *templ,
 509                                    const uint64_t *modifiers, int count)
 510 {
 511    struct agx_device *dev = agx_device(screen);
 512    struct agx_resource *nresource;
 513
 514    nresource = CALLOC_STRUCT(agx_resource);
 515    if (!nresource)
 516       return NULL;
 517
 518    nresource->base = *templ;
 519    nresource->base.screen = screen;
 520
 521    if (modifiers) {
 522       nresource->modifier =
 523          agx_select_modifier_from_list(nresource, modifiers, count);
 524
 525       /* There may not be a matching modifier, bail if so */
 526       if (nresource->modifier == DRM_FORMAT_MOD_INVALID) {
 527          free(nresource);
 528          return NULL;
 529       }
 530    } else {
 531       nresource->modifier = agx_select_best_modifier(nresource);
 532
 533       assert(nresource->modifier != DRM_FORMAT_MOD_INVALID);
 534    }
 535
 536    /* If there's only 1 layer and there's no compression, there's no harm in
 537     * inferring the shader image flag. Do so to avoid reallocation in case the
 538     * resource is later used as an image.
 539     */
 540    if (nresource->modifier != DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED &&
 541        templ->depth0 == 1) {
 542
 543       nresource->base.bind |= PIPE_BIND_SHADER_IMAGE;
 544    }
 545
 546    nresource->mipmapped = (templ->last_level > 0);
 547
 548    assert(templ->format != PIPE_FORMAT_Z24X8_UNORM &&
 549           templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT &&
 550           "u_transfer_helper should have lowered");
 551
 552    agx_resource_setup(dev, nresource);
 553
 554    pipe_reference_init(&nresource->base.reference, 1);
 555
 556    ail_make_miptree(&nresource->layout);
 557
 558    if (templ->target == PIPE_BUFFER) {
 559       assert(nresource->layout.tiling == AIL_TILING_LINEAR);
 560       util_range_init(&nresource->valid_buffer_range);
 561    }
 562
 563    /* Guess a label based on the bind */
 564    unsigned bind = templ->bind;
 565
 566    const char *label = (bind & PIPE_BIND_INDEX_BUFFER)     ? "Index buffer"
 567                        : (bind & PIPE_BIND_SCANOUT)        ? "Scanout"
 568                        : (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
 569                        : (bind & PIPE_BIND_SHARED)         ? "Shared resource"
 570                        : (bind & PIPE_BIND_RENDER_TARGET)  ? "Render target"
 571                        : (bind & PIPE_BIND_DEPTH_STENCIL)
 572                           ? "Depth/stencil buffer"
 573                        : (bind & PIPE_BIND_SAMPLER_VIEW)    ? "Texture"
 574                        : (bind & PIPE_BIND_VERTEX_BUFFER)   ? "Vertex buffer"
 575                        : (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
 576                        : (bind & PIPE_BIND_GLOBAL)          ? "Global memory"
 577                        : (bind & PIPE_BIND_SHADER_BUFFER)   ? "Shader buffer"
 578                        : (bind & PIPE_BIND_SHADER_IMAGE)    ? "Shader image"
 579                                                             : "Other resource";
 580
 581    uint32_t create_flags = 0;
 582
 583    /* Default to write-combine resources, but use writeback if that is expected
 584     * to be beneficial.
 585     */
 586    if (nresource->base.usage == PIPE_USAGE_STAGING ||
 587        (nresource->base.flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
 588
 589       create_flags |= AGX_BO_WRITEBACK;
 590    }
 591
 592    /* Allow disabling write-combine to debug performance issues */
 593    if (dev->debug & AGX_DBG_NOWC) {
 594       create_flags |= AGX_BO_WRITEBACK;
 595    }
 596
 597    /* Create buffers that might be shared with the SHAREABLE flag */
 598    if (bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SHARED))
 599       create_flags |= AGX_BO_SHAREABLE;
 600
 601    nresource->bo =
 602       agx_bo_create(dev, nresource->layout.size_B, create_flags, label);
 603
 604    if (!nresource->bo) {
 605       FREE(nresource);
 606       return NULL;
 607    }
 608
 609    agx_resource_debug(nresource, "New: ");
 610    return &nresource->base;
 611 }
 612
 613 static struct pipe_resource *
 614 agx_resource_create(struct pipe_screen *screen,
 615                     const struct pipe_resource *templ)
 616 {
 617    return agx_resource_create_with_modifiers(screen, templ, NULL, 0);
 618 }
 619
 620 static void
 621 agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc)
 622 {
 623    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
 624    struct agx_screen *agx_screen = (struct agx_screen *)screen;
 625
 626    agx_resource_debug(rsrc, "Destroy: ");
 627
 628    if (prsrc->target == PIPE_BUFFER)
 629       util_range_destroy(&rsrc->valid_buffer_range);
 630
 631    if (rsrc->scanout)
 632       renderonly_scanout_destroy(rsrc->scanout, agx_screen->dev.ro);
 633
 634    agx_bo_unreference(rsrc->bo);
 635    FREE(rsrc);
 636 }
 637
 638 void
 639 agx_batch_track_image(struct agx_batch *batch, struct pipe_image_view *image)
 640 {
 641    struct agx_resource *rsrc = agx_resource(image->resource);
 642
 643    if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
 644       agx_batch_writes(batch, rsrc);
 645
 646       bool is_buffer = rsrc->base.target == PIPE_BUFFER;
 647       unsigned level = is_buffer ? 0 : image->u.tex.level;
 648       BITSET_SET(rsrc->data_valid, level);
 649
 650       if (is_buffer) {
 651          util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
 652                         rsrc->base.width0);
 653       }
 654    } else {
 655       agx_batch_reads(batch, rsrc);
 656    }
 657 }
 658
 659 /*
 660  * transfer
 661  */
 662
 663 static void
 664 agx_transfer_flush_region(struct pipe_context *pipe,
 665                           struct pipe_transfer *transfer,
 666                           const struct pipe_box *box)
 667 {
 668 }
 669
 670 /* Reallocate the backing buffer of a resource, returns true if successful */
 671 static bool
 672 agx_shadow(struct agx_context *ctx, struct agx_resource *rsrc, bool needs_copy)
 673 {
 674    struct agx_device *dev = agx_device(ctx->base.screen);
 675    struct agx_bo *old = rsrc->bo;
 676    size_t size = rsrc->layout.size_B;
 677    unsigned flags = old->flags;
 678
 679    if (dev->debug & AGX_DBG_NOSHADOW)
 680       return false;
 681
 682    /* If a resource is (or could be) shared, shadowing would desync across
 683     * processes. (It's also not what this path is for.)
 684     */
 685    if (flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))
 686       return false;
 687
 688    /* Do not shadow resources that are too large */
 689    if (size > MAX_SHADOW_BYTES)
 690       return false;
 691
 692    /* Do not shadow resources too much */
 693    if (rsrc->shadowed_bytes >= MAX_TOTAL_SHADOW_BYTES)
 694       return false;
 695
 696    rsrc->shadowed_bytes += size;
 697
 698    /* If we need to copy, we reallocate the resource with cached-coherent
 699     * memory. This is a heuristic: it assumes that if the app needs a shadows
 700     * (with a copy) now, it will again need to shadow-and-copy the same resource
 701     * in the future. This accelerates the later copies, since otherwise the copy
 702     * involves reading uncached memory.
 703     */
 704    if (needs_copy)
 705       flags |= AGX_BO_WRITEBACK;
 706
 707    struct agx_bo *new_ = agx_bo_create(dev, size, flags, old->label);
 708
 709    /* If allocation failed, we can fallback on a flush gracefully*/
 710    if (new_ == NULL)
 711       return false;
 712
 713    if (needs_copy) {
 714       perf_debug_ctx(ctx, "Shadowing %zu bytes on the CPU (%s)", size,
 715                      (old->flags & AGX_BO_WRITEBACK) ? "cached" : "uncached");
 716       agx_resource_debug(rsrc, "Shadowed: ");
 717
 718       memcpy(new_->ptr.cpu, old->ptr.cpu, size);
 719    }
 720
 721    /* Swap the pointers, dropping a reference */
 722    agx_bo_unreference(rsrc->bo);
 723    rsrc->bo = new_;
 724
 725    /* Reemit descriptors using this resource */
 726    agx_dirty_all(ctx);
 727    return true;
 728 }
 729
 730 /*
 731  * Perform the required synchronization before a transfer_map operation can
 732  * complete. This may require syncing batches.
 733  */
 734 static void
 735 agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc,
 736                     unsigned level,
 737                     unsigned usage, /* a combination of PIPE_MAP_x */
 738                     const struct pipe_box *box)
 739 {
 740    /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
 741     * being mapped.
 742     */
 743    if ((usage & PIPE_MAP_DISCARD_RANGE) &&
 744        !(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
 745        rsrc->base.last_level == 0 &&
 746        util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y, box->z,
 747                                         box->width, box->height, box->depth)) {
 748
 749       usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
 750    }
 751
 752    /* Shadowing doesn't work separate stencil or shared resources */
 753    if (rsrc->separate_stencil || (rsrc->bo->flags & AGX_BO_SHARED))
 754       usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
 755
 756    /* If the access is unsynchronized, there's nothing to do */
 757    if (usage & PIPE_MAP_UNSYNCHRONIZED)
 758       return;
 759
 760    /* Everything after this needs the context, which is not safe for
 761     * unsynchronized transfers when we claim
 762     * PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE.
 763     */
 764    assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
 765
 766    /* Both writing and reading need writers synced */
 767    agx_sync_writer(ctx, rsrc, "Unsynchronized transfer");
 768
 769    /* Additionally, writing needs readers synced */
 770    if (!(usage & PIPE_MAP_WRITE))
 771       return;
 772
 773    /* If the range being written is uninitialized, we do not need to sync. */
 774    if (rsrc->base.target == PIPE_BUFFER && !(rsrc->bo->flags & AGX_BO_SHARED) &&
 775        !util_ranges_intersect(&rsrc->valid_buffer_range, box->x,
 776                               box->x + box->width))
 777       return;
 778
 779    /* If there are no readers, we're done. We check at the start to
 780     * avoid expensive shadowing paths or duplicated checks in this hapyp path.
 781     */
 782    if (!agx_any_batch_uses_resource(ctx, rsrc)) {
 783       rsrc->shadowed_bytes = 0;
 784       return;
 785    }
 786
 787    /* There are readers. Try to shadow the resource to avoid a sync */
 788    if (!(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
 789        agx_shadow(ctx, rsrc, !(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)))
 790       return;
 791
 792    /* Otherwise, we need to sync */
 793    agx_sync_readers(ctx, rsrc, "Unsynchronized write");
 794
 795    rsrc->shadowed_bytes = 0;
 796 }
 797
 798 /*
 799  * Return a colour-renderable format compatible with a depth/stencil format, to
 800  * be used as an interchange format for depth/stencil blits. For
 801  * non-depth/stencil formats, returns the format itself.
 802  */
 803 static enum pipe_format
 804 agx_staging_color_format_for_zs(enum pipe_format format)
 805 {
 806    switch (format) {
 807    case PIPE_FORMAT_Z16_UNORM:
 808       return PIPE_FORMAT_R16_UNORM;
 809    case PIPE_FORMAT_Z32_FLOAT:
 810       return PIPE_FORMAT_R32_FLOAT;
 811    case PIPE_FORMAT_S8_UINT:
 812       return PIPE_FORMAT_R8_UINT;
 813    default:
 814       /* Z24 and combined Z/S are lowered to one of the above formats by
 815        * u_transfer_helper. The caller needs to pass in the rsrc->layout.format
 816        * and not the rsrc->base.format to get the lowered physical format
 817        * (rather than the API logical format).
 818        */
 819       assert(!util_format_is_depth_or_stencil(format) &&
 820              "no other depth/stencil formats allowed for staging");
 821
 822       return format;
 823    }
 824 }
 825
 826 /* Most of the time we can do CPU-side transfers, but sometimes we need to use
 827  * the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures.
 828  * Code adapted from panfrost */
 829
 830 static struct agx_resource *
 831 agx_alloc_staging(struct pipe_screen *screen, struct agx_resource *rsc,
 832                   unsigned level, const struct pipe_box *box)
 833 {
 834    struct pipe_resource tmpl = rsc->base;
 835
 836    tmpl.width0 = box->width;
 837    tmpl.height0 = box->height;
 838    tmpl.depth0 = 1;
 839
 840    /* We need a linear staging resource. We have linear 2D arrays, but not
 841     * linear 3D or cube textures. So switch to 2D arrays if needed.
 842     */
 843    switch (tmpl.target) {
 844    case PIPE_TEXTURE_2D_ARRAY:
 845    case PIPE_TEXTURE_CUBE:
 846    case PIPE_TEXTURE_CUBE_ARRAY:
 847    case PIPE_TEXTURE_3D:
 848       tmpl.target = PIPE_TEXTURE_2D_ARRAY;
 849       tmpl.array_size = box->depth;
 850       break;
 851    default:
 852       assert(tmpl.array_size == 1);
 853       assert(box->depth == 1);
 854       break;
 855    }
 856
 857    tmpl.last_level = 0;
 858
 859    /* Linear is incompatible with depth/stencil, so we convert */
 860    tmpl.format = agx_staging_color_format_for_zs(rsc->layout.format);
 861    tmpl.bind &= ~PIPE_BIND_DEPTH_STENCIL;
 862    tmpl.bind |= PIPE_BIND_LINEAR | PIPE_BIND_RENDER_TARGET;
 863
 864    struct pipe_resource *pstaging = screen->resource_create(screen, &tmpl);
 865    if (!pstaging)
 866       return NULL;
 867
 868    return agx_resource(pstaging);
 869 }
 870
 871 static void
 872 agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans)
 873 {
 874    struct pipe_resource *dst = trans->base.resource;
 875    struct pipe_blit_info blit = {0};
 876
 877    blit.dst.resource = dst;
 878    blit.dst.format =
 879       agx_staging_color_format_for_zs(agx_resource(dst)->layout.format);
 880    blit.dst.level = trans->base.level;
 881    blit.dst.box = trans->base.box;
 882    blit.src.resource = trans->staging.rsrc;
 883    blit.src.format = trans->staging.rsrc->format;
 884    blit.src.level = 0;
 885    blit.src.box = trans->staging.box;
 886    blit.mask = util_format_get_mask(blit.src.format);
 887    blit.filter = PIPE_TEX_FILTER_NEAREST;
 888
 889    agx_blit(pctx, &blit);
 890 }
 891
 892 static void
 893 agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
 894 {
 895    struct pipe_resource *src = trans->base.resource;
 896    struct pipe_blit_info blit = {0};
 897
 898    blit.src.resource = src;
 899    blit.src.format =
 900       agx_staging_color_format_for_zs(agx_resource(src)->layout.format);
 901    blit.src.level = trans->base.level;
 902    blit.src.box = trans->base.box;
 903    blit.dst.resource = trans->staging.rsrc;
 904    blit.dst.format = trans->staging.rsrc->format;
 905    blit.dst.level = 0;
 906    blit.dst.box = trans->staging.box;
 907    blit.mask = util_format_get_mask(blit.dst.format);
 908    blit.filter = PIPE_TEX_FILTER_NEAREST;
 909
 910    agx_blit(pctx, &blit);
 911 }
 912
 913 static void *
 914 agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource,
 915                  unsigned level,
 916                  unsigned usage, /* a combination of PIPE_MAP_x */
 917                  const struct pipe_box *box,
 918                  struct pipe_transfer **out_transfer)
 919 {
 920    struct agx_context *ctx = agx_context(pctx);
 921    struct agx_resource *rsrc = agx_resource(resource);
 922
 923    /* Can't map tiled/compressed directly */
 924    if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
 925       return NULL;
 926
 927    /* Can't transfer out of bounds mip levels */
 928    if (level >= rsrc->layout.levels)
 929       return NULL;
 930
 931    agx_prepare_for_map(ctx, rsrc, level, usage, box);
 932
 933    /* Track the written buffer range */
 934    if (resource->target == PIPE_BUFFER) {
 935       /* Note the ordering: DISCARD|WRITE is valid, so clear before adding. */
 936       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
 937          util_range_set_empty(&rsrc->valid_buffer_range);
 938       if (usage & PIPE_MAP_WRITE) {
 939          util_range_add(resource, &rsrc->valid_buffer_range, box->x,
 940                         box->x + box->width);
 941       }
 942    }
 943
 944    struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
 945    transfer->base.level = level;
 946    transfer->base.usage = usage;
 947    transfer->base.box = *box;
 948
 949    pipe_resource_reference(&transfer->base.resource, resource);
 950    *out_transfer = &transfer->base;
 951
 952    /* For compression, we use a staging blit as we do not implement AGX
 953     * compression in software. In some cases, we could use this path for
 954     * twiddled too, but we don't have a use case for that yet.
 955     */
 956    if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED) {
 957       /* Should never happen for buffers, and it's not safe */
 958       assert(resource->target != PIPE_BUFFER);
 959
 960       struct agx_resource *staging =
 961          agx_alloc_staging(pctx->screen, rsrc, level, box);
 962       assert(staging);
 963
 964       /* Staging resources have one LOD: level 0. Query the strides
 965        * on this LOD.
 966        */
 967       transfer->base.stride = ail_get_linear_stride_B(&staging->layout, 0);
 968       transfer->base.layer_stride = staging->layout.layer_stride_B;
 969       transfer->staging.rsrc = &staging->base;
 970
 971       transfer->staging.box = *box;
 972       transfer->staging.box.x = 0;
 973       transfer->staging.box.y = 0;
 974       transfer->staging.box.z = 0;
 975
 976       assert(transfer->staging.rsrc != NULL);
 977
 978       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
 979          agx_blit_to_staging(pctx, transfer);
 980          agx_sync_writer(ctx, staging, "GPU read staging blit");
 981       }
 982
 983       agx_bo_mmap(staging->bo);
 984       return staging->bo->ptr.cpu;
 985    }
 986
 987    agx_bo_mmap(rsrc->bo);
 988
 989    if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) {
 990       /* Should never happen for buffers, and it's not safe */
 991       assert(resource->target != PIPE_BUFFER);
 992
 993       transfer->base.stride =
 994          util_format_get_stride(rsrc->layout.format, box->width);
 995
 996       transfer->base.layer_stride = util_format_get_2d_size(
 997          rsrc->layout.format, transfer->base.stride, box->height);
 998
 999       transfer->map = calloc(transfer->base.layer_stride, box->depth);
1000
1001       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
1002          for (unsigned z = 0; z < box->depth; ++z) {
1003             uint8_t *map = agx_map_texture_cpu(rsrc, level, box->z + z);
1004             uint8_t *dst =
1005                (uint8_t *)transfer->map + transfer->base.layer_stride * z;
1006
1007             ail_detile(map, dst, &rsrc->layout, level, transfer->base.stride,
1008                        box->x, box->y, box->width, box->height);
1009          }
1010       }
1011
1012       return transfer->map;
1013    } else {
1014       assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
1015
1016       transfer->base.stride = ail_get_linear_stride_B(&rsrc->layout, level);
1017       transfer->base.layer_stride = rsrc->layout.layer_stride_B;
1018
1019       /* Be conservative for direct writes */
1020       if ((usage & PIPE_MAP_WRITE) &&
1021           (usage &
1022            (PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))) {
1023          BITSET_SET(rsrc->data_valid, level);
1024       }
1025
1026       uint32_t offset =
1027          ail_get_linear_pixel_B(&rsrc->layout, level, box->x, box->y, box->z);
1028
1029       return ((uint8_t *)rsrc->bo->ptr.cpu) + offset;
1030    }
1031 }
1032
1033 static void
1034 agx_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
1035 {
1036    /* Gallium expects writeback here, so we tile */
1037
1038    struct agx_transfer *trans = agx_transfer(transfer);
1039    struct pipe_resource *prsrc = transfer->resource;
1040    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
1041
1042    if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) {
1043       assert(prsrc->target != PIPE_BUFFER);
1044       agx_blit_from_staging(pctx, trans);
1045       agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
1046                         "GPU write staging blit");
1047    } else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) {
1048       assert(rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED);
1049
1050       for (unsigned z = 0; z < transfer->box.depth; ++z) {
1051          uint8_t *map =
1052             agx_map_texture_cpu(rsrc, transfer->level, transfer->box.z + z);
1053          uint8_t *src = (uint8_t *)trans->map + transfer->layer_stride * z;
1054
1055          ail_tile(map, src, &rsrc->layout, transfer->level, transfer->stride,
1056                   transfer->box.x, transfer->box.y, transfer->box.width,
1057                   transfer->box.height);
1058       }
1059    }
1060
1061    /* The level we wrote is now initialized. We do this at the end so
1062     * blit_from_staging can avoid reloading existing contents.
1063     */
1064    if (transfer->usage & PIPE_MAP_WRITE)
1065       BITSET_SET(rsrc->data_valid, transfer->level);
1066
1067    /* Free the transfer */
1068    free(trans->map);
1069    pipe_resource_reference(&trans->staging.rsrc, NULL);
1070    pipe_resource_reference(&transfer->resource, NULL);
1071    FREE(transfer);
1072 }
1073
1074 static bool
1075 agx_generate_mipmap(struct pipe_context *pctx, struct pipe_resource *prsrc,
1076                     enum pipe_format format, unsigned base_level,
1077                     unsigned last_level, unsigned first_layer,
1078                     unsigned last_layer)
1079 {
1080    struct agx_resource *rsrc = agx_resource(prsrc);
1081
1082    /* Generating a mipmap invalidates the written levels. Make that
1083     * explicit so we don't reload the previous contents.
1084     */
1085    for (unsigned l = base_level + 1; l <= last_level; ++l)
1086       BITSET_CLEAR(rsrc->data_valid, l);
1087
1088    /* For now we use util_gen_mipmap, but this has way too much overhead */
1089    perf_debug_ctx(agx_context(pctx), "Unoptimized mipmap generation");
1090
1091    return util_gen_mipmap(pctx, prsrc, format, base_level, last_level,
1092                           first_layer, last_layer, PIPE_TEX_FILTER_LINEAR);
1093 }
1094
1095 /*
1096  * clear/copy
1097  */
1098 static void
1099 agx_clear(struct pipe_context *pctx, unsigned buffers,
1100           const struct pipe_scissor_state *scissor_state,
1101           const union pipe_color_union *color, double depth, unsigned stencil)
1102 {
1103    struct agx_context *ctx = agx_context(pctx);
1104    struct agx_batch *batch = agx_get_batch(ctx);
1105
1106    if (unlikely(!agx_render_condition_check(ctx)))
1107       return;
1108
1109    unsigned fastclear = buffers & ~(batch->draw | batch->load);
1110    unsigned slowclear = buffers & ~fastclear;
1111
1112    assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED");
1113
1114    /* Fast clears configure the batch */
1115    for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
1116       if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
1117          continue;
1118
1119       static_assert(sizeof(color->f) == 16, "mismatched structure");
1120
1121       batch->uploaded_clear_color[rt] =
1122          agx_pool_upload_aligned(&batch->pool, color->f, sizeof(color->f), 16);
1123    }
1124
1125    if (fastclear & PIPE_CLEAR_DEPTH)
1126       batch->clear_depth = depth;
1127
1128    if (fastclear & PIPE_CLEAR_STENCIL)
1129       batch->clear_stencil = stencil;
1130
1131    /* Slow clears draw a fullscreen rectangle */
1132    if (slowclear) {
1133       agx_blitter_save(ctx, ctx->blitter, false /* render cond */);
1134       util_blitter_clear(
1135          ctx->blitter, ctx->framebuffer.width, ctx->framebuffer.height,
1136          util_framebuffer_get_num_layers(&ctx->framebuffer), slowclear, color,
1137          depth, stencil,
1138          util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
1139    }
1140
1141    if (fastclear)
1142       agx_batch_init_state(batch);
1143
1144    batch->clear |= fastclear;
1145    batch->resolve |= buffers;
1146    assert((batch->draw & slowclear) == slowclear);
1147 }
1148
1149 static void
1150 transition_resource(struct pipe_context *pctx, struct agx_resource *rsrc,
1151                     struct pipe_resource *templ)
1152 {
1153    struct agx_resource *new_res =
1154       agx_resource(pctx->screen->resource_create(pctx->screen, templ));
1155
1156    assert(new_res);
1157    assert(!(rsrc->base.bind & PIPE_BIND_SHARED) && "cannot swap BOs if shared");
1158
1159    int level;
1160    BITSET_FOREACH_SET(level, rsrc->data_valid, PIPE_MAX_TEXTURE_LEVELS) {
1161       /* Blit each valid level */
1162       struct pipe_blit_info blit = {0};
1163
1164       u_box_3d(0, 0, 0, rsrc->layout.width_px, rsrc->layout.height_px,
1165                rsrc->layout.depth_px, &blit.dst.box);
1166       blit.src.box = blit.dst.box;
1167
1168       blit.dst.resource = &new_res->base;
1169       blit.dst.format = new_res->base.format;
1170       blit.dst.level = level;
1171       blit.src.resource = &rsrc->base;
1172       blit.src.format = rsrc->base.format;
1173       blit.src.level = level;
1174       blit.mask = util_format_get_mask(blit.src.format);
1175       blit.filter = PIPE_TEX_FILTER_NEAREST;
1176       agx_blit(pctx, &blit);
1177    }
1178
1179    /* Flush the blits out, to make sure the old resource is no longer used */
1180    agx_flush_writer(agx_context(pctx), new_res, "flush_resource");
1181
1182    /* Copy the bind flags and swap the BOs */
1183    struct agx_bo *old = rsrc->bo;
1184    rsrc->base.bind = new_res->base.bind;
1185    rsrc->layout = new_res->layout;
1186    rsrc->modifier = new_res->modifier;
1187    rsrc->bo = new_res->bo;
1188    new_res->bo = old;
1189
1190    /* Free the new resource, which now owns the old BO */
1191    pipe_resource_reference((struct pipe_resource **)&new_res, NULL);
1192 }
1193
1194 void
1195 agx_decompress(struct agx_context *ctx, struct agx_resource *rsrc,
1196                const char *reason)
1197 {
1198    if (rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED) {
1199       perf_debug_ctx(ctx, "Decompressing resource due to %s", reason);
1200    } else if (!rsrc->layout.writeable_image) {
1201       perf_debug_ctx(ctx, "Reallocating image due to %s", reason);
1202    }
1203
1204    struct pipe_resource templ = rsrc->base;
1205    assert(!(templ.bind & PIPE_BIND_SHADER_IMAGE) && "currently compressed");
1206    templ.bind |= PIPE_BIND_SHADER_IMAGE /* forces off compression */;
1207    transition_resource(&ctx->base, rsrc, &templ);
1208 }
1209
1210 static void
1211 agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
1212 {
1213    struct agx_resource *rsrc = agx_resource(pres);
1214
1215    /* flush_resource is used to prepare resources for sharing, so if this is not
1216     * already a shareabe resource, make it so
1217     */
1218    struct agx_bo *old = rsrc->bo;
1219    if (!(old->flags & AGX_BO_SHAREABLE)) {
1220       assert(rsrc->layout.levels == 1 &&
1221              "Shared resources must not be mipmapped");
1222       assert(rsrc->layout.sample_count_sa == 1 &&
1223              "Shared resources must not be multisampled");
1224       assert(rsrc->bo);
1225       assert(!(pres->bind & PIPE_BIND_SHARED));
1226
1227       struct pipe_resource templ = *pres;
1228       templ.bind |= PIPE_BIND_SHARED;
1229       transition_resource(pctx, rsrc, &templ);
1230    } else {
1231       /* Otherwise just claim it's already shared */
1232       pres->bind |= PIPE_BIND_SHARED;
1233       agx_flush_writer(agx_context(pctx), rsrc, "flush_resource");
1234    }
1235 }
1236
1237 /*
1238  * context
1239  */
1240 static void
1241 agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1242           unsigned flags)
1243 {
1244    struct agx_context *ctx = agx_context(pctx);
1245
1246    agx_flush_all(ctx, "Gallium flush");
1247
1248    /* At this point all pending work has been submitted. Since jobs are
1249     * started and completed sequentially from a UAPI perspective, and since
1250     * we submit all jobs with compute+render barriers on the prior job,
1251     * waiting on the last submitted job is sufficient to guarantee completion
1252     * of all GPU work thus far, so we can create a fence out of the latest
1253     * syncobj.
1254     *
1255     * See this page for more info on how the GPU/UAPI queueing works:
1256     * https://github.com/AsahiLinux/docs/wiki/SW:AGX-driver-notes#queues
1257     */
1258
1259    if (fence) {
1260       struct pipe_fence_handle *f = agx_fence_create(ctx);
1261       pctx->screen->fence_reference(pctx->screen, fence, NULL);
1262       *fence = f;
1263    }
1264 }
1265
1266 void
1267 agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
1268 {
1269    struct agx_device *dev = agx_device(ctx->base.screen);
1270
1271    assert(agx_batch_is_active(batch));
1272    assert(!agx_batch_is_submitted(batch));
1273
1274    /* Make sure there's something to submit. */
1275    if (!batch->clear && !batch->any_draws) {
1276       agx_batch_reset(ctx, batch);
1277       return;
1278    }
1279
1280    assert(batch->initialized);
1281
1282    /* Finalize the encoder */
1283    uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
1284    memcpy(batch->encoder_current, stop, sizeof(stop));
1285
1286    uint64_t pipeline_background = agx_build_meta(batch, false, false);
1287    uint64_t pipeline_background_partial = agx_build_meta(batch, false, true);
1288    uint64_t pipeline_store = agx_build_meta(batch, true, false);
1289
1290    bool clear_pipeline_textures =
1291       agx_tilebuffer_spills(&batch->tilebuffer_layout);
1292
1293    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1294       struct pipe_surface *surf = batch->key.cbufs[i];
1295
1296       if (surf && surf->texture) {
1297          struct agx_resource *rt = agx_resource(surf->texture);
1298          BITSET_SET(rt->data_valid, surf->u.tex.level);
1299
1300          if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
1301             clear_pipeline_textures = true;
1302       }
1303    }
1304
1305    struct agx_resource *zbuf =
1306       batch->key.zsbuf ? agx_resource(batch->key.zsbuf->texture) : NULL;
1307
1308    if (zbuf) {
1309       unsigned level = batch->key.zsbuf->u.tex.level;
1310       BITSET_SET(zbuf->data_valid, level);
1311
1312       if (zbuf->separate_stencil)
1313          BITSET_SET(zbuf->separate_stencil->data_valid, level);
1314    }
1315
1316    /* Scissor and depth bias arrays are staged to dynamic arrays on the CPU. At
1317     * submit time, they're done growing and are uploaded to GPU memory attached
1318     * to the batch.
1319     */
1320    uint64_t scissor = agx_pool_upload_aligned(&batch->pool, batch->scissor.data,
1321                                               batch->scissor.size, 64);
1322    uint64_t zbias = agx_pool_upload_aligned(
1323       &batch->pool, batch->depth_bias.data, batch->depth_bias.size, 64);
1324
1325    /* BO list for a given batch consists of:
1326     *  - BOs for the batch's pools
1327     *  - BOs for the encoder
1328     *  - BO for internal shaders
1329     *  - BOs added to the batch explicitly
1330     */
1331    agx_batch_add_bo(batch, batch->encoder);
1332
1333    /* Occlusion queries are allocated as a contiguous pool */
1334    unsigned oq_count =
1335       util_dynarray_num_elements(&batch->occlusion_queries, struct agx_query *);
1336    size_t oq_size = oq_count * sizeof(uint64_t);
1337
1338    if (oq_size) {
1339       batch->occlusion_buffer =
1340          agx_pool_alloc_aligned(&batch->pool, oq_size, 64);
1341       memset(batch->occlusion_buffer.cpu, 0, oq_size);
1342    } else {
1343       batch->occlusion_buffer.gpu = 0;
1344    }
1345
1346    unsigned handle_count = agx_batch_num_bo(batch) +
1347                            agx_pool_num_bos(&batch->pool) +
1348                            agx_pool_num_bos(&batch->pipeline_pool);
1349
1350    uint32_t *handles = calloc(sizeof(uint32_t), handle_count);
1351    unsigned handle = 0, handle_i = 0;
1352
1353    AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) {
1354       handles[handle_i++] = handle;
1355    }
1356
1357    agx_pool_get_bo_handles(&batch->pool, handles + handle_i);
1358    handle_i += agx_pool_num_bos(&batch->pool);
1359
1360    agx_pool_get_bo_handles(&batch->pipeline_pool, handles + handle_i);
1361    handle_i += agx_pool_num_bos(&batch->pipeline_pool);
1362
1363    /* Size calculation should've been exact */
1364    assert(handle_i == handle_count);
1365
1366    /* TODO: Linux UAPI submission */
1367    (void)dev;
1368    (void)zbias;
1369    (void)scissor;
1370    (void)clear_pipeline_textures;
1371    (void)pipeline_store;
1372    (void)pipeline_background;
1373    (void)pipeline_background_partial;
1374
1375    unreachable("Linux UAPI not yet upstream");
1376    agx_batch_submit(ctx, batch, 0, 0, NULL);
1377 }
1378
1379 static void
1380 agx_destroy_context(struct pipe_context *pctx)
1381 {
1382    struct agx_device *dev = agx_device(pctx->screen);
1383    struct agx_context *ctx = agx_context(pctx);
1384
1385    /* Batch state needs to be freed on completion, and we don't want to yank
1386     * buffers out from in-progress GPU jobs to avoid faults, so just wait until
1387     * everything in progress is actually done on context destroy. This will
1388     * ensure everything is cleaned up properly.
1389     */
1390    agx_sync_all(ctx, "destroy context");
1391
1392    if (pctx->stream_uploader)
1393       u_upload_destroy(pctx->stream_uploader);
1394
1395    if (ctx->blitter)
1396       util_blitter_destroy(ctx->blitter);
1397
1398    util_unreference_framebuffer_state(&ctx->framebuffer);
1399
1400    agx_meta_cleanup(&ctx->meta);
1401
1402    agx_bo_unreference(ctx->result_buf);
1403
1404    drmSyncobjDestroy(dev->fd, ctx->in_sync_obj);
1405    drmSyncobjDestroy(dev->fd, ctx->dummy_syncobj);
1406    if (ctx->in_sync_fd != -1)
1407       close(ctx->in_sync_fd);
1408
1409    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
1410       if (ctx->batches.slots[i].syncobj)
1411          drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
1412    }
1413
1414    ralloc_free(ctx);
1415 }
1416
1417 static void
1418 agx_invalidate_resource(struct pipe_context *pctx,
1419                         struct pipe_resource *resource)
1420 {
1421    struct agx_context *ctx = agx_context(pctx);
1422    struct agx_batch *batch = agx_get_batch(ctx);
1423
1424    /* Handle the glInvalidateFramebuffer case */
1425    if (batch->key.zsbuf && batch->key.zsbuf->texture == resource)
1426       batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
1427
1428    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1429       struct pipe_surface *surf = batch->key.cbufs[i];
1430
1431       if (surf && surf->texture == resource)
1432          batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1433    }
1434 }
1435
1436 static void
1437 agx_memory_barrier(struct pipe_context *pctx, unsigned flags)
1438 {
1439    /* Be conservative for now, we can try to optimize this more later */
1440    agx_flush_all(agx_context(pctx), "Memory barrier");
1441 }
1442
1443 static struct pipe_context *
1444 agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
1445 {
1446    struct agx_context *ctx = rzalloc(NULL, struct agx_context);
1447    struct pipe_context *pctx = &ctx->base;
1448    int ret;
1449
1450    if (!ctx)
1451       return NULL;
1452
1453    pctx->screen = screen;
1454    pctx->priv = priv;
1455
1456    util_dynarray_init(&ctx->writer, ctx);
1457    util_dynarray_init(&ctx->global_buffers, ctx);
1458
1459    pctx->stream_uploader = u_upload_create_default(pctx);
1460    if (!pctx->stream_uploader) {
1461       FREE(pctx);
1462       return NULL;
1463    }
1464    pctx->const_uploader = pctx->stream_uploader;
1465
1466    pctx->destroy = agx_destroy_context;
1467    pctx->flush = agx_flush;
1468    pctx->clear = agx_clear;
1469    pctx->resource_copy_region = util_resource_copy_region;
1470    pctx->blit = agx_blit;
1471    pctx->generate_mipmap = agx_generate_mipmap;
1472    pctx->flush_resource = agx_flush_resource;
1473
1474    pctx->buffer_map = u_transfer_helper_transfer_map;
1475    pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
1476    pctx->texture_map = u_transfer_helper_transfer_map;
1477    pctx->texture_unmap = u_transfer_helper_transfer_unmap;
1478    pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1479
1480    pctx->buffer_subdata = u_default_buffer_subdata;
1481    pctx->clear_buffer = u_default_clear_buffer;
1482    pctx->texture_subdata = u_default_texture_subdata;
1483    pctx->set_debug_callback = u_default_set_debug_callback;
1484    pctx->get_sample_position = u_default_get_sample_position;
1485    pctx->invalidate_resource = agx_invalidate_resource;
1486    pctx->memory_barrier = agx_memory_barrier;
1487
1488    pctx->create_fence_fd = agx_create_fence_fd;
1489    pctx->fence_server_sync = agx_fence_server_sync;
1490
1491    agx_init_state_functions(pctx);
1492    agx_init_query_functions(pctx);
1493    agx_init_streamout_functions(pctx);
1494
1495    agx_meta_init(&ctx->meta, agx_device(screen));
1496
1497    ctx->blitter = util_blitter_create(pctx);
1498
1499    ctx->result_buf = agx_bo_create(
1500       agx_device(screen), sizeof(union agx_batch_result) * AGX_MAX_BATCHES,
1501       AGX_BO_WRITEBACK, "Batch result buffer");
1502    assert(ctx->result_buf);
1503
1504    /* Sync object/FD used for NATIVE_FENCE_FD. */
1505    ctx->in_sync_fd = -1;
1506    ret = drmSyncobjCreate(agx_device(screen)->fd, 0, &ctx->in_sync_obj);
1507    assert(!ret);
1508
1509    /* Dummy sync object used before any work has been submitted. */
1510    ret = drmSyncobjCreate(agx_device(screen)->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
1511                           &ctx->dummy_syncobj);
1512    assert(!ret);
1513    ctx->syncobj = ctx->dummy_syncobj;
1514
1515    /* By default all samples are enabled */
1516    ctx->sample_mask = ~0;
1517
1518    return pctx;
1519 }
1520
1521 static const char *
1522 agx_get_vendor(struct pipe_screen *pscreen)
1523 {
1524    return "Mesa";
1525 }
1526
1527 static const char *
1528 agx_get_device_vendor(struct pipe_screen *pscreen)
1529 {
1530    return "Apple";
1531 }
1532
1533 static const char *
1534 agx_get_name(struct pipe_screen *pscreen)
1535 {
1536    struct agx_device *dev = agx_device(pscreen);
1537
1538    return dev->name;
1539 }
1540
1541 static int
1542 agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
1543 {
1544    bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP;
1545
1546    switch (param) {
1547    case PIPE_CAP_NPOT_TEXTURES:
1548    case PIPE_CAP_SHADER_STENCIL_EXPORT:
1549    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
1550    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
1551    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
1552    case PIPE_CAP_DEPTH_CLIP_DISABLE:
1553    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
1554    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
1555    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
1556    case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
1557    case PIPE_CAP_FS_FINE_DERIVATIVE:
1558       return 1;
1559
1560    /* We could support ARB_clip_control by toggling the clip control bit for
1561     * the render pass. Because this bit is for the whole render pass,
1562     * switching clip modes necessarily incurs a flush. This should be ok, from
1563     * the ARB_clip_control spec:
1564     *
1565     *         Some implementations may introduce a flush when changing the
1566     *         clip control state.  Hence frequent clip control changes are
1567     *         not recommended.
1568     *
1569     * However, this would require tuning to ensure we don't flush unnecessary
1570     * when using u_blitter clears, for example. As we don't yet have a use case,
1571     * don't expose the feature.
1572     */
1573    case PIPE_CAP_CLIP_HALFZ:
1574       return 0;
1575
1576    case PIPE_CAP_MAX_RENDER_TARGETS:
1577    case PIPE_CAP_FBFETCH:
1578    case PIPE_CAP_FBFETCH_COHERENT:
1579       return 8;
1580    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
1581       return 1;
1582
1583    case PIPE_CAP_OCCLUSION_QUERY:
1584    case PIPE_CAP_GENERATE_MIPMAP:
1585    case PIPE_CAP_PRIMITIVE_RESTART:
1586    case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
1587    case PIPE_CAP_ANISOTROPIC_FILTER:
1588    case PIPE_CAP_NATIVE_FENCE_FD:
1589       return true;
1590
1591    case PIPE_CAP_SAMPLER_VIEW_TARGET:
1592    case PIPE_CAP_TEXTURE_SWIZZLE:
1593    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
1594    case PIPE_CAP_INDEP_BLEND_ENABLE:
1595    case PIPE_CAP_INDEP_BLEND_FUNC:
1596    case PIPE_CAP_ACCELERATED:
1597    case PIPE_CAP_UMA:
1598    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
1599    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
1600    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
1601    case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
1602    case PIPE_CAP_PACKED_UNIFORMS:
1603    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
1604    case PIPE_CAP_VS_INSTANCEID:
1605    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
1606    case PIPE_CAP_CONDITIONAL_RENDER:
1607    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
1608    case PIPE_CAP_SEAMLESS_CUBE_MAP:
1609    case PIPE_CAP_LOAD_CONSTBUF:
1610    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
1611    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
1612    case PIPE_CAP_NULL_TEXTURES:
1613    case PIPE_CAP_TEXTURE_MULTISAMPLE:
1614    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
1615    case PIPE_CAP_IMAGE_STORE_FORMATTED:
1616    case PIPE_CAP_COMPUTE:
1617    case PIPE_CAP_INT64:
1618    case PIPE_CAP_SAMPLE_SHADING:
1619       return 1;
1620    case PIPE_CAP_SURFACE_SAMPLE_COUNT:
1621       /* TODO: MSRTT */
1622       return 0;
1623
1624    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
1625       return 0;
1626
1627    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
1628       return PIPE_MAX_SO_BUFFERS;
1629
1630    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
1631    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
1632       return PIPE_MAX_SO_OUTPUTS;
1633
1634    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
1635    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
1636       return 1;
1637
1638    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
1639       return 2048;
1640
1641    case PIPE_CAP_GLSL_FEATURE_LEVEL:
1642    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
1643       return is_deqp ? 330 : 140;
1644    case PIPE_CAP_ESSL_FEATURE_LEVEL:
1645       return 320;
1646
1647    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
1648       return 16;
1649
1650    case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
1651       return AGX_TEXTURE_BUFFER_MAX_SIZE;
1652
1653    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
1654       return 64;
1655
1656    case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
1657       return 1;
1658
1659    /* We run nir_lower_point_size so we need the GLSL linker to copy
1660     * the original gl_PointSize when captured by transform feedback. We could
1661     * also copy it ourselves but it's easier to set the CAP.
1662     */
1663    case PIPE_CAP_PSIZ_CLAMPED:
1664       return 1;
1665
1666    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
1667       return 16384;
1668    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
1669    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
1670       return 13;
1671
1672    case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
1673    case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
1674    case PIPE_CAP_TGSI_TEXCOORD:
1675    case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
1676    case PIPE_CAP_FS_POSITION_IS_SYSVAL:
1677       return true;
1678    case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
1679    case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
1680    case PIPE_CAP_FS_POINT_IS_SYSVAL:
1681       return false;
1682
1683    case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
1684       return 0xffff;
1685
1686    case PIPE_CAP_TEXTURE_TRANSFER_MODES:
1687       return PIPE_TEXTURE_TRANSFER_BLIT;
1688
1689    case PIPE_CAP_ENDIANNESS:
1690       return PIPE_ENDIAN_LITTLE;
1691
1692    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
1693       return 4;
1694    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
1695       return -8;
1696    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
1697       return 7;
1698    case PIPE_CAP_DRAW_INDIRECT:
1699       return true;
1700
1701    case PIPE_CAP_VIDEO_MEMORY: {
1702       uint64_t system_memory;
1703
1704       if (!os_get_total_physical_memory(&system_memory))
1705          return 0;
1706
1707       return (int)(system_memory >> 20);
1708    }
1709
1710    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
1711       return 4;
1712
1713    case PIPE_CAP_MAX_VARYINGS:
1714       return 16;
1715
1716    case PIPE_CAP_FLATSHADE:
1717    case PIPE_CAP_TWO_SIDED_COLOR:
1718    case PIPE_CAP_ALPHA_TEST:
1719    case PIPE_CAP_POINT_SIZE_FIXED:
1720    case PIPE_CAP_CLIP_PLANES:
1721    case PIPE_CAP_NIR_IMAGES_AS_DEREF:
1722       return 0;
1723
1724    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
1725       return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
1726
1727    case PIPE_CAP_SUPPORTED_PRIM_MODES:
1728    case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:
1729       return BITFIELD_BIT(MESA_PRIM_POINTS) | BITFIELD_BIT(MESA_PRIM_LINES) |
1730              BITFIELD_BIT(MESA_PRIM_LINE_STRIP) |
1731              BITFIELD_BIT(MESA_PRIM_LINE_LOOP) |
1732              BITFIELD_BIT(MESA_PRIM_TRIANGLES) |
1733              BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) |
1734              BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN) |
1735              BITFIELD_BIT(MESA_PRIM_QUADS) | BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
1736
1737    case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
1738       return 1;
1739
1740    default:
1741       return u_pipe_screen_get_param_defaults(pscreen, param);
1742    }
1743 }
1744
1745 static float
1746 agx_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
1747 {
1748    switch (param) {
1749    case PIPE_CAPF_MIN_LINE_WIDTH:
1750    case PIPE_CAPF_MIN_LINE_WIDTH_AA:
1751    case PIPE_CAPF_MIN_POINT_SIZE:
1752    case PIPE_CAPF_MIN_POINT_SIZE_AA:
1753       return 1;
1754
1755    case PIPE_CAPF_POINT_SIZE_GRANULARITY:
1756    case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
1757       return 0.1;
1758
1759    case PIPE_CAPF_MAX_LINE_WIDTH:
1760    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
1761       return 16.0; /* Off-by-one fixed point 4:4 encoding */
1762
1763    case PIPE_CAPF_MAX_POINT_SIZE:
1764    case PIPE_CAPF_MAX_POINT_SIZE_AA:
1765       return 511.95f;
1766
1767    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
1768       return 16.0;
1769
1770    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
1771       return 16.0; /* arbitrary */
1772
1773    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
1774    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
1775    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
1776       return 0.0f;
1777
1778    default:
1779       debug_printf("Unexpected PIPE_CAPF %d query\n", param);
1780       return 0.0;
1781    }
1782 }
1783
1784 static int
1785 agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
1786                      enum pipe_shader_cap param)
1787 {
1788    bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
1789
1790    switch (shader) {
1791    case PIPE_SHADER_VERTEX:
1792    case PIPE_SHADER_FRAGMENT:
1793    case PIPE_SHADER_COMPUTE:
1794       break;
1795    default:
1796       return false;
1797    }
1798
1799    /* Don't allow side effects with vertex processing. The APIs don't require it
1800     * and it may be problematic on our hardware.
1801     */
1802    bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);
1803
1804    /* this is probably not totally correct.. but it's a start: */
1805    switch (param) {
1806    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
1807    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
1808    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
1809    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
1810       return 16384;
1811
1812    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
1813       return 1024;
1814
1815    case PIPE_SHADER_CAP_MAX_INPUTS:
1816       return 16;
1817
1818    case PIPE_SHADER_CAP_MAX_OUTPUTS:
1819       return shader == PIPE_SHADER_FRAGMENT ? 8 : 16;
1820
1821    case PIPE_SHADER_CAP_MAX_TEMPS:
1822       return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
1823
1824    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
1825       return 16 * 1024 * sizeof(float);
1826
1827    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
1828       return 16;
1829
1830    case PIPE_SHADER_CAP_CONT_SUPPORTED:
1831       return 0;
1832
1833    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
1834    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
1835    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
1836    case PIPE_SHADER_CAP_SUBROUTINES:
1837    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
1838       return 0;
1839
1840    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
1841    case PIPE_SHADER_CAP_INTEGERS:
1842       return true;
1843
1844    case PIPE_SHADER_CAP_FP16:
1845    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
1846    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
1847    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
1848       return !is_no16;
1849    case PIPE_SHADER_CAP_INT16:
1850       /* GLSL compiler is broken. Flip this on when Panfrost does. */
1851       return false;
1852
1853    case PIPE_SHADER_CAP_INT64_ATOMICS:
1854    case PIPE_SHADER_CAP_DROUND_SUPPORTED:
1855    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
1856       return 0;
1857
1858    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
1859       return 16;
1860
1861    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
1862       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
1863
1864    case PIPE_SHADER_CAP_SUPPORTED_IRS:
1865       return (1 << PIPE_SHADER_IR_NIR);
1866
1867    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
1868       return allow_side_effects ? PIPE_MAX_SHADER_BUFFERS : 0;
1869
1870    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
1871       return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;
1872
1873    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
1874    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
1875       return 0;
1876
1877    default:
1878       /* Other params are unknown */
1879       return 0;
1880    }
1881
1882    return 0;
1883 }
1884
1885 static int
1886 agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
1887                       enum pipe_compute_cap param, void *ret)
1888 {
1889 #define RET(x)                                                                 \
1890    do {                                                                        \
1891       if (ret)                                                                 \
1892          memcpy(ret, x, sizeof(x));                                            \
1893       return sizeof(x);                                                        \
1894    } while (0)
1895
1896    switch (param) {
1897    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
1898       RET((uint32_t[]){64});
1899
1900    case PIPE_COMPUTE_CAP_IR_TARGET:
1901       if (ret)
1902          sprintf(ret, "agx");
1903       return strlen("agx") * sizeof(char);
1904
1905    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
1906       RET((uint64_t[]){3});
1907
1908    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
1909       RET(((uint64_t[]){65535, 65535, 65535}));
1910
1911    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
1912       RET(((uint64_t[]){256, 256, 256}));
1913
1914    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
1915       RET((uint64_t[]){256});
1916
1917    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
1918    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
1919       uint64_t system_memory;
1920
1921       if (!os_get_total_physical_memory(&system_memory))
1922          return 0;
1923
1924       RET((uint64_t[]){system_memory});
1925    }
1926
1927    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
1928       RET((uint64_t[]){32768});
1929
1930    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
1931    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
1932       RET((uint64_t[]){4096});
1933
1934    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
1935       RET((uint32_t[]){800 /* MHz -- TODO */});
1936
1937    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
1938       RET((uint32_t[]){4 /* TODO */});
1939
1940    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
1941       RET((uint32_t[]){1});
1942
1943    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
1944       RET((uint32_t[]){32});
1945
1946    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
1947       RET((uint32_t[]){0 /* TODO */});
1948
1949    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
1950       RET((uint64_t[]){1024}); // TODO
1951    }
1952
1953    return 0;
1954 }
1955
1956 static bool
1957 agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format,
1958                         enum pipe_texture_target target, unsigned sample_count,
1959                         unsigned storage_sample_count, unsigned usage)
1960 {
1961    assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
1962           target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
1963           target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
1964           target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
1965           target == PIPE_TEXTURE_CUBE_ARRAY);
1966
1967    if (sample_count > 1 && sample_count != 4 && sample_count != 2)
1968       return false;
1969
1970    if (sample_count > 1 && agx_device(pscreen)->debug & AGX_DBG_NOMSAA)
1971       return false;
1972
1973    if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
1974       return false;
1975
1976    if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format))
1977       return false;
1978
1979    /* For framebuffer_no_attachments, fake support for "none" images */
1980    if (format == PIPE_FORMAT_NONE)
1981       return true;
1982
1983    if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
1984       enum pipe_format tex_format = format;
1985
1986       /* Mimic the fixup done in create_sampler_view and u_transfer_helper so we
1987        * advertise GL_OES_texture_stencil8. Alternatively, we could make mesa/st
1988        * less stupid?
1989        */
1990       if (tex_format == PIPE_FORMAT_X24S8_UINT)
1991          tex_format = PIPE_FORMAT_S8_UINT;
1992
1993       struct agx_pixel_format_entry ent = agx_pixel_format[tex_format];
1994
1995       if (!agx_is_valid_pixel_format(tex_format))
1996          return false;
1997
1998       /* RGB32 is emulated for texture buffers only */
1999       if (ent.channels == AGX_CHANNELS_R32G32B32_EMULATED &&
2000           target != PIPE_BUFFER)
2001          return false;
2002
2003       if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable)
2004          return false;
2005    }
2006
2007    if (usage & PIPE_BIND_DEPTH_STENCIL) {
2008       switch (format) {
2009       /* natively supported */
2010       case PIPE_FORMAT_Z16_UNORM:
2011       case PIPE_FORMAT_Z32_FLOAT:
2012       case PIPE_FORMAT_S8_UINT:
2013
2014       /* lowered by u_transfer_helper to one of the above */
2015       case PIPE_FORMAT_Z24X8_UNORM:
2016       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2017       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2018          break;
2019
2020       default:
2021          return false;
2022       }
2023    }
2024
2025    return true;
2026 }
2027
2028 static void
2029 agx_query_dmabuf_modifiers(struct pipe_screen *screen, enum pipe_format format,
2030                            int max, uint64_t *modifiers,
2031                            unsigned int *external_only, int *out_count)
2032 {
2033    int i;
2034
2035    if (max == 0) {
2036       *out_count = ARRAY_SIZE(agx_best_modifiers);
2037       return;
2038    }
2039
2040    for (i = 0; i < ARRAY_SIZE(agx_best_modifiers) && i < max; i++) {
2041       if (external_only)
2042          external_only[i] = 0;
2043
2044       modifiers[i] = agx_best_modifiers[i];
2045    }
2046
2047    /* Return the number of modifiers copied */
2048    *out_count = i;
2049 }
2050
2051 static bool
2052 agx_is_dmabuf_modifier_supported(struct pipe_screen *screen, uint64_t modifier,
2053                                  enum pipe_format format, bool *external_only)
2054 {
2055    if (external_only)
2056       *external_only = false;
2057
2058    for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
2059       if (agx_best_modifiers[i] == modifier)
2060          return true;
2061    }
2062
2063    return false;
2064 }
2065
2066 static void
2067 agx_destroy_screen(struct pipe_screen *pscreen)
2068 {
2069    struct agx_screen *screen = agx_screen(pscreen);
2070
2071    if (screen->dev.ro)
2072       screen->dev.ro->destroy(screen->dev.ro);
2073
2074    u_transfer_helper_destroy(pscreen->transfer_helper);
2075    agx_close_device(&screen->dev);
2076    disk_cache_destroy(screen->disk_cache);
2077    ralloc_free(screen);
2078 }
2079
2080 static const void *
2081 agx_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
2082                          enum pipe_shader_type shader)
2083 {
2084    return &agx_nir_options;
2085 }
2086
2087 static void
2088 agx_resource_set_stencil(struct pipe_resource *prsrc,
2089                          struct pipe_resource *stencil)
2090 {
2091    agx_resource(prsrc)->separate_stencil = agx_resource(stencil);
2092 }
2093
2094 static struct pipe_resource *
2095 agx_resource_get_stencil(struct pipe_resource *prsrc)
2096 {
2097    return (struct pipe_resource *)agx_resource(prsrc)->separate_stencil;
2098 }
2099
2100 static enum pipe_format
2101 agx_resource_get_internal_format(struct pipe_resource *prsrc)
2102 {
2103    return agx_resource(prsrc)->layout.format;
2104 }
2105
2106 static struct disk_cache *
2107 agx_get_disk_shader_cache(struct pipe_screen *pscreen)
2108 {
2109    return agx_screen(pscreen)->disk_cache;
2110 }
2111
2112 static const struct u_transfer_vtbl transfer_vtbl = {
2113    .resource_create = agx_resource_create,
2114    .resource_destroy = agx_resource_destroy,
2115    .transfer_map = agx_transfer_map,
2116    .transfer_unmap = agx_transfer_unmap,
2117    .transfer_flush_region = agx_transfer_flush_region,
2118    .get_internal_format = agx_resource_get_internal_format,
2119    .set_stencil = agx_resource_set_stencil,
2120    .get_stencil = agx_resource_get_stencil,
2121 };
2122
2123 static int
2124 agx_screen_get_fd(struct pipe_screen *pscreen)
2125 {
2126    return agx_device(pscreen)->fd;
2127 }
2128
2129 struct pipe_screen *
2130 agx_screen_create(int fd, struct renderonly *ro)
2131 {
2132    struct agx_screen *agx_screen;
2133    struct pipe_screen *screen;
2134
2135    agx_screen = rzalloc(NULL, struct agx_screen);
2136    if (!agx_screen)
2137       return NULL;
2138
2139    screen = &agx_screen->pscreen;
2140
2141    /* Set debug before opening */
2142    agx_screen->dev.debug =
2143       debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0);
2144
2145    agx_screen->dev.fd = fd;
2146    agx_screen->dev.ro = ro;
2147
2148    /* Try to open an AGX device */
2149    if (!agx_open_device(screen, &agx_screen->dev)) {
2150       ralloc_free(agx_screen);
2151       return NULL;
2152    }
2153
2154    if (agx_screen->dev.debug & AGX_DBG_DEQP) {
2155       /* You're on your own. */
2156       static bool warned_about_hacks = false;
2157
2158       if (!warned_about_hacks) {
2159          agx_msg("\n------------------\n"
2160                  "Unsupported debug parameter set. Expect breakage.\n"
2161                  "Do not report bugs.\n"
2162                  "------------------\n\n");
2163          warned_about_hacks = true;
2164       }
2165    }
2166
2167    screen->destroy = agx_destroy_screen;
2168    screen->get_screen_fd = agx_screen_get_fd;
2169    screen->get_name = agx_get_name;
2170    screen->get_vendor = agx_get_vendor;
2171    screen->get_device_vendor = agx_get_device_vendor;
2172    screen->get_param = agx_get_param;
2173    screen->get_shader_param = agx_get_shader_param;
2174    screen->get_compute_param = agx_get_compute_param;
2175    screen->get_paramf = agx_get_paramf;
2176    screen->is_format_supported = agx_is_format_supported;
2177    screen->query_dmabuf_modifiers = agx_query_dmabuf_modifiers;
2178    screen->is_dmabuf_modifier_supported = agx_is_dmabuf_modifier_supported;
2179    screen->context_create = agx_create_context;
2180    screen->resource_from_handle = agx_resource_from_handle;
2181    screen->resource_get_handle = agx_resource_get_handle;
2182    screen->resource_get_param = agx_resource_get_param;
2183    screen->resource_create_with_modifiers = agx_resource_create_with_modifiers;
2184    screen->get_timestamp = u_default_get_timestamp;
2185    screen->fence_reference = agx_fence_reference;
2186    screen->fence_finish = agx_fence_finish;
2187    screen->fence_get_fd = agx_fence_get_fd;
2188    screen->get_compiler_options = agx_get_compiler_options;
2189    screen->get_disk_shader_cache = agx_get_disk_shader_cache;
2190
2191    screen->resource_create = u_transfer_helper_resource_create;
2192    screen->resource_destroy = u_transfer_helper_resource_destroy;
2193    screen->transfer_helper = u_transfer_helper_create(
2194       &transfer_vtbl,
2195       U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
2196          U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F);
2197
2198    agx_disk_cache_init(agx_screen);
2199
2200    return screen;
2201 }