drivers/gpu/drm/i915/gem/i915_gem_context_types.h

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #ifndef __I915_GEM_CONTEXT_TYPES_H__
   8 #define __I915_GEM_CONTEXT_TYPES_H__
   9
  10 #include <linux/atomic.h>
  11 #include <linux/list.h>
  12 #include <linux/llist.h>
  13 #include <linux/kref.h>
  14 #include <linux/mutex.h>
  15 #include <linux/radix-tree.h>
  16 #include <linux/rbtree.h>
  17 #include <linux/rcupdate.h>
  18 #include <linux/types.h>
  19
  20 #include "gt/intel_context_types.h"
  21
  22 #include "i915_scheduler.h"
  23 #include "i915_sw_fence.h"
  24
  25 struct pid;
  26
  27 struct drm_i915_private;
  28 struct drm_i915_file_private;
  29 struct i915_address_space;
  30 struct intel_timeline;
  31 struct intel_ring;
  32
  33 /**
  34  * struct i915_gem_engines - A set of engines
  35  */
  36 struct i915_gem_engines {
  37         union {
  38                 /** @link: Link in i915_gem_context::stale::engines */
  39                 struct list_head link;
  40
  41                 /** @rcu: RCU to use when freeing */
  42                 struct rcu_head rcu;
  43         };
  44
  45         /** @fence: Fence used for delayed destruction of engines */
  46         struct i915_sw_fence fence;
  47
  48         /** @ctx: i915_gem_context backpointer */
  49         struct i915_gem_context *ctx;
  50
  51         /** @num_engines: Number of engines in this set */
  52         unsigned int num_engines;
  53
  54         /** @engines: Array of engines */
  55         struct intel_context *engines[];
  56 };
  57
  58 /**
  59  * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set
  60  */
  61 struct i915_gem_engines_iter {
  62         /** @idx: Index into i915_gem_engines::engines */
  63         unsigned int idx;
  64
  65         /** @engines: Engine set being iterated */
  66         const struct i915_gem_engines *engines;
  67 };
  68
  69 /**
  70  * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine
  71  */
  72 enum i915_gem_engine_type {
  73         /** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */
  74         I915_GEM_ENGINE_TYPE_INVALID = 0,
  75
  76         /** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */
  77         I915_GEM_ENGINE_TYPE_PHYSICAL,
  78
  79         /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
  80         I915_GEM_ENGINE_TYPE_BALANCED,
  81
  82         /** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
  83         I915_GEM_ENGINE_TYPE_PARALLEL,
  84 };
  85
  86 /**
  87  * struct i915_gem_proto_engine - prototype engine
  88  *
  89  * This struct describes an engine that a context may contain.  Engines
  90  * have four types:
  91  *
  92  *  - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
  93  *    show up as a NULL in i915_gem_engines::engines[i] and any attempt to
  94  *    use them by the user results in -EINVAL.  They are also useful during
  95  *    proto-context construction because the client may create invalid
  96  *    engines and then set them up later as virtual engines.
  97  *
  98  *  - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by
  99  *    i915_gem_proto_engine::engine.
 100  *
 101  *  - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
 102  *    i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
 103  *
 104  *  - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described
 105  *    i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and
 106  *    i915_gem_proto_engine::siblings.
 107  */
 108 struct i915_gem_proto_engine {
 109         /** @type: Type of this engine */
 110         enum i915_gem_engine_type type;
 111
 112         /** @engine: Engine, for physical */
 113         struct intel_engine_cs *engine;
 114
 115         /** @num_siblings: Number of balanced or parallel siblings */
 116         unsigned int num_siblings;
 117
 118         /** @width: Width of each sibling */
 119         unsigned int width;
 120
 121         /** @siblings: Balanced siblings or num_siblings * width for parallel */
 122         struct intel_engine_cs **siblings;
 123
 124         /** @sseu: Client-set SSEU parameters */
 125         struct intel_sseu sseu;
 126 };
 127
 128 /**
 129  * struct i915_gem_proto_context - prototype context
 130  *
 131  * The struct i915_gem_proto_context represents the creation parameters for
 132  * a struct i915_gem_context.  This is used to gather parameters provided
 133  * either through creation flags or via SET_CONTEXT_PARAM so that, when we
 134  * create the final i915_gem_context, those parameters can be immutable.
 135  *
 136  * The context uAPI allows for two methods of setting context parameters:
 137  * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM.  The former is
 138  * allowed to be called at any time while the later happens as part of
 139  * GEM_CONTEXT_CREATE.  When these were initially added, Currently,
 140  * everything settable via one is settable via the other.  While some
 141  * params are fairly simple and setting them on a live context is harmless
 142  * such the context priority, others are far trickier such as the VM or the
 143  * set of engines.  To avoid some truly nasty race conditions, we don't
 144  * allow setting the VM or the set of engines on live contexts.
 145  *
 146  * The way we dealt with this without breaking older userspace that sets
 147  * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of
 148  * the actual context until after the client is done configuring it with
 149  * SET_CONTEXT_PARAM.  From the perspective of the client, it has the same
 150  * u32 context ID the whole time.  From the perspective of i915, however,
 151  * it's an i915_gem_proto_context right up until the point where we attempt
 152  * to do something which the proto-context can't handle at which point the
 153  * real context gets created.
 154  *
 155  * This is accomplished via a little xarray dance.  When GEM_CONTEXT_CREATE
 156  * is called, we create a proto-context, reserve a slot in context_xa but
 157  * leave it NULL, the proto-context in the corresponding slot in
 158  * proto_context_xa.  Then, whenever we go to look up a context, we first
 159  * check context_xa.  If it's there, we return the i915_gem_context and
 160  * we're done.  If it's not, we look in proto_context_xa and, if we find it
 161  * there, we create the actual context and kill the proto-context.
 162  *
 163  * At the time we made this change (April, 2021), we did a fairly complete
 164  * audit of existing userspace to ensure this wouldn't break anything:
 165  *
 166  *  - Mesa/i965 didn't use the engines or VM APIs at all
 167  *
 168  *  - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and
 169  *    didn't use the VM API.
 170  *
 171  *  - Mesa/iris didn't use the engines or VM APIs at all
 172  *
 173  *  - The open-source compute-runtime didn't yet use the engines API but
 174  *    did use the VM API via SET_CONTEXT_PARAM.  However, CONTEXT_SETPARAM
 175  *    was always the second ioctl on that context, immediately following
 176  *    GEM_CONTEXT_CREATE.
 177  *
 178  *  - The media driver sets engines and bonding/balancing via
 179  *    SET_CONTEXT_PARAM.  However, CONTEXT_SETPARAM to set the VM was
 180  *    always the second ioctl on that context, immediately following
 181  *    GEM_CONTEXT_CREATE and setting engines immediately followed that.
 182  *
 183  * In order for this dance to work properly, any modification to an
 184  * i915_gem_proto_context that is exposed to the client via
 185  * drm_i915_file_private::proto_context_xa must be guarded by
 186  * drm_i915_file_private::proto_context_lock.  The exception is when a
 187  * proto-context has not yet been exposed such as when handling
 188  * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
 189  */
 190 struct i915_gem_proto_context {
 191         /** @vm: See &i915_gem_context.vm */
 192         struct i915_address_space *vm;
 193
 194         /** @user_flags: See &i915_gem_context.user_flags */
 195         unsigned long user_flags;
 196
 197         /** @sched: See &i915_gem_context.sched */
 198         struct i915_sched_attr sched;
 199
 200         /** @num_user_engines: Number of user-specified engines or -1 */
 201         int num_user_engines;
 202
 203         /** @user_engines: User-specified engines */
 204         struct i915_gem_proto_engine *user_engines;
 205
 206         /** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */
 207         struct intel_sseu legacy_rcs_sseu;
 208
 209         /** @single_timeline: See See &i915_gem_context.syncobj */
 210         bool single_timeline;
 211
 212         /** @uses_protected_content: See &i915_gem_context.uses_protected_content */
 213         bool uses_protected_content;
 214
 215         /** @pxp_wakeref: See &i915_gem_context.pxp_wakeref */
 216         intel_wakeref_t pxp_wakeref;
 217 };
 218
 219 /**
 220  * struct i915_gem_context - client state
 221  *
 222  * The struct i915_gem_context represents the combined view of the driver and
 223  * logical hardware state for a particular client.
 224  */
 225 struct i915_gem_context {
 226         /** @i915: i915 device backpointer */
 227         struct drm_i915_private *i915;
 228
 229         /** @file_priv: owning file descriptor */
 230         struct drm_i915_file_private *file_priv;
 231
 232         /**
 233          * @engines: User defined engines for this context
 234          *
 235          * Various uAPI offer the ability to lookup up an
 236          * index from this array to select an engine operate on.
 237          *
 238          * Multiple logically distinct instances of the same engine
 239          * may be defined in the array, as well as composite virtual
 240          * engines.
 241          *
 242          * Execbuf uses the I915_EXEC_RING_MASK as an index into this
 243          * array to select which HW context + engine to execute on. For
 244          * the default array, the user_ring_map[] is used to translate
 245          * the legacy uABI onto the approprate index (e.g. both
 246          * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same
 247          * context, and I915_EXEC_BSD is weird). For a use defined
 248          * array, execbuf uses I915_EXEC_RING_MASK as a plain index.
 249          *
 250          * User defined by I915_CONTEXT_PARAM_ENGINE (when the
 251          * CONTEXT_USER_ENGINES flag is set).
 252          */
 253         struct i915_gem_engines __rcu *engines;
 254
 255         /** @engines_mutex: guards writes to engines */
 256         struct mutex engines_mutex;
 257
 258         /**
 259          * @syncobj: Shared timeline syncobj
 260          *
 261          * When the SHARED_TIMELINE flag is set on context creation, we
 262          * emulate a single timeline across all engines using this syncobj.
 263          * For every execbuffer2 call, this syncobj is used as both an in-
 264          * and out-fence.  Unlike the real intel_timeline, this doesn't
 265          * provide perfect atomic in-order guarantees if the client races
 266          * with itself by calling execbuffer2 twice concurrently.  However,
 267          * if userspace races with itself, that's not likely to yield well-
 268          * defined results anyway so we choose to not care.
 269          */
 270         struct drm_syncobj *syncobj;
 271
 272         /**
 273          * @vm: unique address space (GTT)
 274          *
 275          * In full-ppgtt mode, each context has its own address space ensuring
 276          * complete seperation of one client from all others.
 277          *
 278          * In other modes, this is a NULL pointer with the expectation that
 279          * the caller uses the shared global GTT.
 280          */
 281         struct i915_address_space *vm;
 282
 283         /**
 284          * @pid: process id of creator
 285          *
 286          * Note that who created the context may not be the principle user,
 287          * as the context may be shared across a local socket. However,
 288          * that should only affect the default context, all contexts created
 289          * explicitly by the client are expected to be isolated.
 290          */
 291         struct pid *pid;
 292
 293         /** @link: place with &drm_i915_private.context_list */
 294         struct list_head link;
 295
 296         /**
 297          * @ref: reference count
 298          *
 299          * A reference to a context is held by both the client who created it
 300          * and on each request submitted to the hardware using the request
 301          * (to ensure the hardware has access to the state until it has
 302          * finished all pending writes). See i915_gem_context_get() and
 303          * i915_gem_context_put() for access.
 304          */
 305         struct kref ref;
 306
 307         /**
 308          * @release_work:
 309          *
 310          * Work item for deferred cleanup, since i915_gem_context_put() tends to
 311          * be called from hardirq context.
 312          *
 313          * FIXME: The only real reason for this is &i915_gem_engines.fence, all
 314          * other callers are from process context and need at most some mild
 315          * shuffling to pull the i915_gem_context_put() call out of a spinlock.
 316          */
 317         struct work_struct release_work;
 318
 319         /**
 320          * @rcu: rcu_head for deferred freeing.
 321          */
 322         struct rcu_head rcu;
 323
 324         /**
 325          * @user_flags: small set of booleans controlled by the user
 326          */
 327         unsigned long user_flags;
 328 #define UCONTEXT_NO_ERROR_CAPTURE       1
 329 #define UCONTEXT_BANNABLE               2
 330 #define UCONTEXT_RECOVERABLE            3
 331 #define UCONTEXT_PERSISTENCE            4
 332
 333         /**
 334          * @flags: small set of booleans
 335          */
 336         unsigned long flags;
 337 #define CONTEXT_CLOSED                  0
 338 #define CONTEXT_USER_ENGINES            1
 339
 340         /**
 341          * @uses_protected_content: context uses PXP-encrypted objects.
 342          *
 343          * This flag can only be set at ctx creation time and it's immutable for
 344          * the lifetime of the context. See I915_CONTEXT_PARAM_PROTECTED_CONTENT
 345          * in uapi/drm/i915_drm.h for more info on setting restrictions and
 346          * expected behaviour of marked contexts.
 347          */
 348         bool uses_protected_content;
 349
 350         /**
 351          * @pxp_wakeref: wakeref to keep the device awake when PXP is in use
 352          *
 353          * PXP sessions are invalidated when the device is suspended, which in
 354          * turns invalidates all contexts and objects using it. To keep the
 355          * flow simple, we keep the device awake when contexts using PXP objects
 356          * are in use. It is expected that the userspace application only uses
 357          * PXP when the display is on, so taking a wakeref here shouldn't worsen
 358          * our power metrics.
 359          */
 360         intel_wakeref_t pxp_wakeref;
 361
 362         /** @mutex: guards everything that isn't engines or handles_vma */
 363         struct mutex mutex;
 364
 365         /** @sched: scheduler parameters */
 366         struct i915_sched_attr sched;
 367
 368         /** @guilty_count: How many times this context has caused a GPU hang. */
 369         atomic_t guilty_count;
 370         /**
 371          * @active_count: How many times this context was active during a GPU
 372          * hang, but did not cause it.
 373          */
 374         atomic_t active_count;
 375
 376         /**
 377          * @hang_timestamp: The last time(s) this context caused a GPU hang
 378          */
 379         unsigned long hang_timestamp[2];
 380 #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
 381
 382         /** @remap_slice: Bitmask of cache lines that need remapping */
 383         u8 remap_slice;
 384
 385         /**
 386          * @handles_vma: rbtree to look up our context specific obj/vma for
 387          * the user handle. (user handles are per fd, but the binding is
 388          * per vm, which may be one per context or shared with the global GTT)
 389          */
 390         struct radix_tree_root handles_vma;
 391
 392         /** @lut_mutex: Locks handles_vma */
 393         struct mutex lut_mutex;
 394
 395         /**
 396          * @name: arbitrary name, used for user debug
 397          *
 398          * A name is constructed for the context from the creator's process
 399          * name, pid and user handle in order to uniquely identify the
 400          * context in messages.
 401          */
 402         char name[TASK_COMM_LEN + 8];
 403
 404         /** @stale: tracks stale engines to be destroyed */
 405         struct {
 406                 /** @lock: guards engines */
 407                 spinlock_t lock;
 408                 /** @engines: list of stale engines */
 409                 struct list_head engines;
 410         } stale;
 411 };
 412
 413 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */