long jiffies;
pid_t pid;
u32 context;
+ int ban_score;
u32 seqno;
u32 head;
u32 tail;
/* This context had batch active when hang was declared */
unsigned batch_active;
- /* Time when this context was last blamed for a GPU reset */
- unsigned long guilty_ts;
-
- /* If the contexts causes a second GPU hang within this time,
- * it is permanently banned from submitting any more work.
- */
- unsigned long ban_period_seconds;
+ bool bannable:1;
/* This context is banned to submit more work */
- bool banned;
+ bool banned:1;
#define CONTEXT_SCORE_GUILTY 10
#define CONTEXT_SCORE_BAN_THRESHOLD 40
/* For hangcheck timer */
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
- /* Hang gpu twice in this window and your context gets banned */
-#define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
struct delayed_work hangcheck_work;
static bool i915_context_is_banned(const struct i915_gem_context *ctx)
{
const struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
- unsigned long elapsed;
if (hs->banned)
return true;
- if (!hs->ban_period_seconds)
+ if (!hs->bannable)
return false;
- elapsed = get_seconds() - hs->guilty_ts;
- if (elapsed <= hs->ban_period_seconds) {
- DRM_DEBUG("context hanging too fast, banning!\n");
- return true;
- }
-
if (hs->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD) {
DRM_DEBUG("context hanging too often, banning!\n");
return true;
hs->banned = i915_context_is_banned(ctx);
hs->batch_active++;
- hs->guilty_ts = get_seconds();
}
static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
* is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(dev_priv);
- ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
+ ctx->hang_stats.bannable = true;
ctx->ring_size = 4 * PAGE_SIZE;
ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
args->size = 0;
switch (args->param) {
case I915_CONTEXT_PARAM_BAN_PERIOD:
- args->value = ctx->hang_stats.ban_period_seconds;
+ ret = -EINVAL;
break;
case I915_CONTEXT_PARAM_NO_ZEROMAP:
args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
break;
+ case I915_CONTEXT_PARAM_BANNABLE:
+ args->value = ctx->hang_stats.bannable;
+ break;
default:
ret = -EINVAL;
break;
switch (args->param) {
case I915_CONTEXT_PARAM_BAN_PERIOD:
- if (args->size)
- ret = -EINVAL;
- else if (args->value < ctx->hang_stats.ban_period_seconds &&
- !capable(CAP_SYS_ADMIN))
- ret = -EPERM;
- else
- ctx->hang_stats.ban_period_seconds = args->value;
+ ret = -EINVAL;
break;
case I915_CONTEXT_PARAM_NO_ZEROMAP:
if (args->size) {
ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
}
break;
+ case I915_CONTEXT_PARAM_BANNABLE:
+ if (args->size)
+ ret = -EINVAL;
+ else if (!capable(CAP_SYS_ADMIN) && !args->value)
+ ret = -EPERM;
+ else
+ ctx->hang_stats.bannable = args->value;
+ break;
default:
ret = -EINVAL;
break;
if (!erq->seqno)
return;
- err_printf(m, "%s pid %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
- prefix, erq->pid,
+ err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
+ prefix, erq->pid, erq->ban_score,
erq->context, erq->seqno,
jiffies_to_msecs(jiffies - erq->jiffies),
erq->head, erq->tail);
struct drm_i915_error_request *erq)
{
erq->context = request->ctx->hw_id;
+ erq->ban_score = request->ctx->hang_stats.ban_score;
erq->seqno = request->global_seqno;
erq->jiffies = request->emitted_jiffies;
erq->head = request->head;
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
+#define I915_CONTEXT_PARAM_BANNABLE 0x5
__u64 value;
};