drm/nouveau/fifo/gk104: fix race condition when updating engine runlists
authorBen Skeggs <bskeggs@redhat.com>
Tue, 10 Nov 2015 23:37:05 +0000 (09:37 +1000)
committerBen Skeggs <bskeggs@redhat.com>
Mon, 11 Jan 2016 01:17:40 +0000 (11:17 +1000)
The CPU-side tracking of engine runlists was not protected by a lock,
leading to list corruption, eventually causing runlist_update() to
overrun the GPU-side runlist, triggering an OOPS.

Fixes some of the issues noticed during parallel piglit runs.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c

index 98970a0b7a66867b0c804c87bbadd3810f8a2af1..8d5db6d5a5587ab13c3aa34f814f9af92b4bf01e 100644 (file)
@@ -47,7 +47,7 @@ gk104_fifo_uevent_init(struct nvkm_fifo *fifo)
 }
 
 void
-gk104_fifo_runlist_update(struct gk104_fifo *fifo, u32 engine)
+gk104_fifo_runlist_commit(struct gk104_fifo *fifo, u32 engine)
 {
        struct gk104_fifo_engn *engn = &fifo->engine[engine];
        struct gk104_fifo_chan *chan;
@@ -78,6 +78,22 @@ gk104_fifo_runlist_update(struct gk104_fifo *fifo, u32 engine)
        mutex_unlock(&subdev->mutex);
 }
 
+void
+gk104_fifo_runlist_remove(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
+{
+       mutex_lock(&fifo->base.engine.subdev.mutex);
+       list_del_init(&chan->head);
+       mutex_unlock(&fifo->base.engine.subdev.mutex);
+}
+
+void
+gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
+{
+       mutex_lock(&fifo->base.engine.subdev.mutex);
+       list_add_tail(&chan->head, &fifo->engine[chan->engine].chan);
+       mutex_unlock(&fifo->base.engine.subdev.mutex);
+}
+
 static inline struct nvkm_engine *
 gk104_fifo_engine(struct gk104_fifo *fifo, u32 engn)
 {
@@ -112,7 +128,7 @@ gk104_fifo_recover_work(struct work_struct *work)
                        nvkm_subdev_fini(&engine->subdev, false);
                        WARN_ON(nvkm_subdev_init(&engine->subdev));
                }
-               gk104_fifo_runlist_update(fifo, gk104_fifo_subdev_engine(engn));
+               gk104_fifo_runlist_commit(fifo, gk104_fifo_subdev_engine(engn));
        }
 
        nvkm_wr32(device, 0x00262c, engm);
index 5afd9b5ec5d1895bc9ec0a21c396b42e93bf09a7..bec519d8f91ebe4ca792a011c38f4fb86ef56d99 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <subdev/mmu.h>
 
+struct gk104_fifo_chan;
 struct gk104_fifo_engn {
        struct nvkm_memory *runlist[2];
        int cur_runlist;
@@ -35,7 +36,9 @@ void gk104_fifo_fini(struct nvkm_fifo *);
 void gk104_fifo_intr(struct nvkm_fifo *);
 void gk104_fifo_uevent_init(struct nvkm_fifo *);
 void gk104_fifo_uevent_fini(struct nvkm_fifo *);
-void gk104_fifo_runlist_update(struct gk104_fifo *, u32 engine);
+void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
+void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
+void gk104_fifo_runlist_commit(struct gk104_fifo *, u32 engine);
 
 static inline u64
 gk104_fifo_engine_subdev(int engine)
index 0b817540a9e4f7d62a50aacf0a38d0d06c447abe..500e7d2f2df79cdba3b9ff1d6f39dd49cf1e110d 100644 (file)
@@ -151,9 +151,9 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
        u32 coff = chan->base.chid * 8;
 
        if (!list_empty(&chan->head)) {
-               list_del_init(&chan->head);
+               gk104_fifo_runlist_remove(fifo, chan);
                nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
-               gk104_fifo_runlist_update(fifo, chan->engine);
+               gk104_fifo_runlist_commit(fifo, chan->engine);
        }
 
        nvkm_wr32(device, 0x800000 + coff, 0x00000000);
@@ -172,9 +172,9 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
        nvkm_wr32(device, 0x800000 + coff, 0x80000000 | addr);
 
        if (list_empty(&chan->head) && !chan->killed) {
-               list_add_tail(&chan->head, &fifo->engine[chan->engine].chan);
+               gk104_fifo_runlist_insert(fifo, chan);
                nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-               gk104_fifo_runlist_update(fifo, chan->engine);
+               gk104_fifo_runlist_commit(fifo, chan->engine);
                nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
        }
 }