drm: Get ref on CRTC commit object when waiting for flip_done
authorLeo Li <sunpeng.li@amd.com>
Mon, 15 Oct 2018 13:46:40 +0000 (09:46 -0400)
committerHarry Wentland <harry.wentland@amd.com>
Thu, 18 Oct 2018 18:23:13 +0000 (14:23 -0400)
This fixes a general protection fault, caused by accessing the contents
of a flip_done completion object that has already been freed. It occurs
due to the preemption of a non-blocking commit worker thread W by
another commit thread X. X continues to clear its atomic state at the
end, destroying the CRTC commit object that W still needs. Switching
back to W and accessing the commit objects then leads to bad results.

Worker W becomes preemptable when waiting for flip_done to complete. At
this point, a frequently occurring commit thread X can take over. Here's
an example where W is a worker thread that flips on both CRTCs, and X
does a legacy cursor update on both CRTCs:

        ...
     1. W does flip work
     2. W runs commit_hw_done()
     3. W waits for flip_done on CRTC 1
     4. > flip_done for CRTC 1 completes
     5. W finishes waiting for CRTC 1
     6. W waits for flip_done on CRTC 2

     7. > Preempted by X
     8. > flip_done for CRTC 2 completes
     9. X atomic_check: hw_done and flip_done are complete on all CRTCs
    10. X updates cursor on both CRTCs
    11. X destroys atomic state
    12. X done

    13. > Switch back to W
    14. W waits for flip_done on CRTC 2
    15. W raises general protection fault

The error looks like so:

    general protection fault: 0000 [#1] PREEMPT SMP PTI
    **snip**
    Call Trace:
     lock_acquire+0xa2/0x1b0
     _raw_spin_lock_irq+0x39/0x70
     wait_for_completion_timeout+0x31/0x130
     drm_atomic_helper_wait_for_flip_done+0x64/0x90 [drm_kms_helper]
     amdgpu_dm_atomic_commit_tail+0xcae/0xdd0 [amdgpu]
     commit_tail+0x3d/0x70 [drm_kms_helper]
     process_one_work+0x212/0x650
     worker_thread+0x49/0x420
     kthread+0xfb/0x130
     ret_from_fork+0x3a/0x50
    Modules linked in: x86_pkg_temp_thermal amdgpu(O) chash(O)
    gpu_sched(O) drm_kms_helper(O) syscopyarea sysfillrect sysimgblt
    fb_sys_fops ttm(O) drm(O)

Note that i915 has this issue masked, since hw_done is signaled after
waiting for flip_done. Doing so will block the cursor update from
happening until hw_done is signaled, preventing the cursor commit from
destroying the state.

v2: The reference on the commit object needs to be obtained before
    hw_done() is signaled, since that's the point where another commit
    is allowed to modify the state. Assuming that the
    new_crtc_state->commit object still exists within flip_done() is
    incorrect.

    Fix by getting a reference in setup_commit(), and releasing it
    during default_clear().

Signed-off-by: Leo Li <sunpeng.li@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1539611200-6184-1-git-send-email-sunpeng.li@amd.com
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_atomic_helper.c
include/drm/drm_atomic.h

index 018fcdb..281cf9c 100644 (file)
@@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
                state->crtcs[i].state = NULL;
                state->crtcs[i].old_state = NULL;
                state->crtcs[i].new_state = NULL;
+
+               if (state->crtcs[i].commit) {
+                       drm_crtc_commit_put(state->crtcs[i].commit);
+                       state->crtcs[i].commit = NULL;
+               }
        }
 
        for (i = 0; i < config->num_total_plane; i++) {
index 80be74d..1bb4c31 100644 (file)
@@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
 void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
                                          struct drm_atomic_state *old_state)
 {
-       struct drm_crtc_state *new_crtc_state;
        struct drm_crtc *crtc;
        int i;
 
-       for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) {
-               struct drm_crtc_commit *commit = new_crtc_state->commit;
+       for (i = 0; i < dev->mode_config.num_crtc; i++) {
+               struct drm_crtc_commit *commit = old_state->crtcs[i].commit;
                int ret;
 
-               if (!commit)
+               crtc = old_state->crtcs[i].ptr;
+
+               if (!crtc || !commit)
                        continue;
 
                ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
@@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
                drm_crtc_commit_get(commit);
 
                commit->abort_completion = true;
+
+               state->crtcs[i].commit = commit;
+               drm_crtc_commit_get(commit);
        }
 
        for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
index da9d95a..1e71315 100644 (file)
@@ -153,6 +153,17 @@ struct __drm_planes_state {
 struct __drm_crtcs_state {
        struct drm_crtc *ptr;
        struct drm_crtc_state *state, *old_state, *new_state;
+
+       /**
+        * @commit:
+        *
+        * A reference to the CRTC commit object that is kept for use by
+        * drm_atomic_helper_wait_for_flip_done() after
+        * drm_atomic_helper_commit_hw_done() is called. This ensures that a
+        * concurrent commit won't free a commit object that is still in use.
+        */
+       struct drm_crtc_commit *commit;
+
        s32 __user *out_fence_ptr;
        u64 last_vblank_count;
 };