Backmerge tag 'v4.16-rc7' into drm-next
authorDave Airlie <airlied@redhat.com>
Wed, 28 Mar 2018 04:30:41 +0000 (14:30 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 28 Mar 2018 04:30:41 +0000 (14:30 +1000)
Linux 4.16-rc7

This was requested by Daniel, and things were getting
a bit hard to reconcile, most of the conflicts were
trivial though.

36 files changed:
1  2 
MAINTAINERS
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_framebuffer.c
drivers/gpu/drm/drm_mm.c
drivers/gpu/drm/drm_probe_helper.c
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/gvt/scheduler.h
drivers/gpu/drm/i915/gvt/trace.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_hangcheck.c
drivers/gpu/drm/imx/ipuv3-plane.c
drivers/gpu/drm/meson/meson_plane.c
drivers/gpu/drm/nouveau/nv50_display.c
drivers/gpu/drm/sun4i/sun4i_crtc.c
drivers/gpu/drm/sun4i/sun4i_drv.c
drivers/gpu/drm/sun4i/sun4i_rgb.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/sun4i/sun4i_tcon.h
drivers/gpu/drm/tegra/dc.c
drivers/gpu/drm/tegra/drm.c
drivers/gpu/drm/tegra/plane.c
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
drivers/pci/quirks.c
sound/pci/hda/hda_intel.c

diff --combined MAINTAINERS
@@@ -766,8 -766,6 +766,8 @@@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_am
  F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
  F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
  F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
 +F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
 +F:    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
  F:    drivers/gpu/drm/amd/amdkfd/
  F:    drivers/gpu/drm/amd/include/cik_structs.h
  F:    drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@@ -1240,7 -1238,7 +1240,7 @@@ F:      drivers/clk/at9
  
  ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
  M:    Nicolas Ferre <nicolas.ferre@microchip.com>
- M:    Alexandre Belloni <alexandre.belloni@free-electrons.com>
+ M:    Alexandre Belloni <alexandre.belloni@bootlin.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  W:    http://www.linux4sam.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git
@@@ -1592,7 -1590,7 +1592,7 @@@ ARM/Marvell Dove/MV78xx0/Orion SOC supp
  M:    Jason Cooper <jason@lakedaemon.net>
  M:    Andrew Lunn <andrew@lunn.ch>
  M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
- M:    Gregory Clement <gregory.clement@free-electrons.com>
+ M:    Gregory Clement <gregory.clement@bootlin.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  F:    Documentation/devicetree/bindings/soc/dove/
@@@ -1606,7 -1604,7 +1606,7 @@@ F:      arch/arm/boot/dts/orion5x
  ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
  M:    Jason Cooper <jason@lakedaemon.net>
  M:    Andrew Lunn <andrew@lunn.ch>
- M:    Gregory Clement <gregory.clement@free-electrons.com>
+ M:    Gregory Clement <gregory.clement@bootlin.com>
  M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
@@@ -2001,8 -1999,10 +2001,10 @@@ M:    Maxime Coquelin <mcoquelin.stm32@gma
  M:    Alexandre Torgue <alexandre.torgue@st.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
- T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32-next
  N:    stm32
+ F:    arch/arm/boot/dts/stm32*
+ F:    arch/arm/mach-stm32/
  F:    drivers/clocksource/armv7m_systick.c
  
  ARM/TANGO ARCHITECTURE
@@@ -4458,13 -4458,6 +4460,13 @@@ T:    git git://anongit.freedesktop.org/dr
  S:    Supported
  F:    drivers/gpu/drm/pl111/
  
 +DRM DRIVER FOR ARM VERSATILE TFT PANELS
 +M:    Linus Walleij <linus.walleij@linaro.org>
 +T:    git git://anongit.freedesktop.org/drm/drm-misc
 +S:    Maintained
 +F:    drivers/gpu/drm/panel/panel-arm-versatile.c
 +F:    Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt
 +
  DRM DRIVER FOR AST SERVER GRAPHICS CHIPS
  M:    Dave Airlie <airlied@redhat.com>
  S:    Odd Fixes
@@@ -4619,8 -4612,8 +4621,8 @@@ F:      include/uapi/drm
  F:    include/linux/vga*
  
  DRM DRIVERS AND MISC GPU PATCHES
 -M:    Daniel Vetter <daniel.vetter@intel.com>
  M:    Gustavo Padovan <gustavo@padovan.org>
 +M:    Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
  M:    Sean Paul <seanpaul@chromium.org>
  W:    https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html
  S:    Maintained
@@@ -4746,7 -4739,6 +4748,7 @@@ F:      drivers/gpu/drm/rcar-du
  F:    drivers/gpu/drm/shmobile/
  F:    include/linux/platform_data/shmob_drm.h
  F:    Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
 +F:    Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
  F:    Documentation/devicetree/bindings/display/renesas,du.txt
  
  DRM DRIVERS FOR ROCKCHIP
@@@ -7610,8 -7602,10 +7612,10 @@@ F:    mm/kasan
  F:    scripts/Makefile.kasan
  
  KCONFIG
+ M:    Masahiro Yamada <yamada.masahiro@socionext.com>
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
  L:    linux-kbuild@vger.kernel.org
- S:    Orphan
+ S:    Maintained
  F:    Documentation/kbuild/kconfig-language.txt
  F:    scripts/kconfig/
  
@@@ -7919,7 -7913,6 +7923,6 @@@ S:      Maintaine
  F:    scripts/leaking_addresses.pl
  
  LED SUBSYSTEM
- M:    Richard Purdie <rpurdie@rpsys.net>
  M:    Jacek Anaszewski <jacek.anaszewski@gmail.com>
  M:    Pavel Machek <pavel@ucw.cz>
  L:    linux-leds@vger.kernel.org
@@@ -9216,6 -9209,7 +9219,7 @@@ MIPS GENERIC PLATFOR
  M:    Paul Burton <paul.burton@mips.com>
  L:    linux-mips@linux-mips.org
  S:    Supported
+ F:    Documentation/devicetree/bindings/power/mti,mips-cpc.txt
  F:    arch/mips/generic/
  F:    arch/mips/tools/generic-board-config.sh
  
@@@ -9931,6 -9925,13 +9935,13 @@@ F:    Documentation/ABI/stable/sysfs-bus-n
  F:    include/linux/nvmem-consumer.h
  F:    include/linux/nvmem-provider.h
  
+ NXP SGTL5000 DRIVER
+ M:    Fabio Estevam <fabio.estevam@nxp.com>
+ L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
+ S:    Maintained
+ F:    Documentation/devicetree/bindings/sound/sgtl5000.txt
+ F:    sound/soc/codecs/sgtl5000*
  NXP TDA998X DRM DRIVER
  M:    Russell King <linux@armlinux.org.uk>
  S:    Supported
@@@ -9955,6 -9956,7 +9966,7 @@@ F:      drivers/nfc/nxp-nc
  
  OBJTOOL
  M:    Josh Poimboeuf <jpoimboe@redhat.com>
+ M:    Peter Zijlstra <peterz@infradead.org>
  S:    Supported
  F:    tools/objtool/
  
@@@ -10332,7 -10334,7 +10344,7 @@@ F:   drivers/oprofile
  F:    include/linux/oprofile.h
  
  ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
- M:    Mark Fasheh <mfasheh@versity.com>
+ M:    Mark Fasheh <mark@fasheh.com>
  M:    Joel Becker <jlbec@evilplan.org>
  L:    ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
  W:    http://ocfs2.wiki.kernel.org
@@@ -10842,6 -10844,7 +10854,7 @@@ F:   drivers/platform/x86/peaq-wmi.
  PER-CPU MEMORY ALLOCATOR
  M:    Tejun Heo <tj@kernel.org>
  M:    Christoph Lameter <cl@linux.com>
+ M:    Dennis Zhou <dennisszhou@gmail.com>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
  S:    Maintained
  F:    include/linux/percpu*.h
@@@ -10935,6 -10938,17 +10948,17 @@@ L: linux-gpio@vger.kernel.or
  S:    Supported
  F:    drivers/pinctrl/pinctrl-at91-pio4.*
  
+ PIN CONTROLLER - FREESCALE
+ M:    Dong Aisheng <aisheng.dong@nxp.com>
+ M:    Fabio Estevam <festevam@gmail.com>
+ M:    Shawn Guo <shawnguo@kernel.org>
+ M:    Stefan Agner <stefan@agner.ch>
+ R:    Pengutronix Kernel Team <kernel@pengutronix.de>
+ L:    linux-gpio@vger.kernel.org
+ S:    Maintained
+ F:    drivers/pinctrl/freescale/
+ F:    Documentation/devicetree/bindings/pinctrl/fsl,*
  PIN CONTROLLER - INTEL
  M:    Mika Westerberg <mika.westerberg@linux.intel.com>
  M:    Heikki Krogerus <heikki.krogerus@linux.intel.com>
@@@ -12101,6 -12115,7 +12125,7 @@@ M:   Sylwester Nawrocki <s.nawrocki@samsu
  L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:    Supported
  F:    sound/soc/samsung/
+ F:    Documentation/devicetree/bindings/sound/samsung*
  
  SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
  M:    Krzysztof Kozlowski <krzk@kernel.org>
@@@ -729,9 -729,11 +729,11 @@@ amdgpu_connector_lvds_detect(struct drm
        enum drm_connector_status ret = connector_status_disconnected;
        int r;
  
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
  
        if (encoder) {
                struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
        /* check acpi lid status ??? */
  
        amdgpu_connector_update_scratch_regs(connector, ret);
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
        return ret;
  }
  
@@@ -861,16 -867,18 +867,18 @@@ amdgpu_connector_vga_detect(struct drm_
        enum drm_connector_status ret = connector_status_disconnected;
        int r;
  
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
  
        encoder = amdgpu_connector_best_single_encoder(connector);
        if (!encoder)
                ret = connector_status_disconnected;
  
        if (amdgpu_connector->ddc_bus)
 -              dret = amdgpu_ddc_probe(amdgpu_connector, false);
 +              dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
        if (dret) {
                amdgpu_connector->detected_by_load = false;
                amdgpu_connector_free_edid(connector);
        amdgpu_connector_update_scratch_regs(connector, ret);
  
  out:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
  
        return ret;
  }
@@@ -981,9 -991,11 +991,11 @@@ amdgpu_connector_dvi_detect(struct drm_
        enum drm_connector_status ret = connector_status_disconnected;
        bool dret = false, broken_edid = false;
  
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
  
        if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
                ret = connector->status;
        }
  
        if (amdgpu_connector->ddc_bus)
 -              dret = amdgpu_ddc_probe(amdgpu_connector, false);
 +              dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
        if (dret) {
                amdgpu_connector->detected_by_load = false;
                amdgpu_connector_free_edid(connector);
        amdgpu_connector_update_scratch_regs(connector, ret);
  
  exit:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
  
        return ret;
  }
@@@ -1352,9 -1366,11 +1366,11 @@@ amdgpu_connector_dp_detect(struct drm_c
        struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
        int r;
  
-       r = pm_runtime_get_sync(connector->dev->dev);
-       if (r < 0)
-               return connector_status_disconnected;
+       if (!drm_kms_helper_is_poll_worker()) {
+               r = pm_runtime_get_sync(connector->dev->dev);
+               if (r < 0)
+                       return connector_status_disconnected;
+       }
  
        if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
                ret = connector->status;
                        /* setup ddc on the bridge */
                        amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
                        /* bridge chips are always aux */
 -                      if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */
 +                      /* try DDC */
 +                      if (amdgpu_display_ddc_probe(amdgpu_connector, true))
                                ret = connector_status_connected;
                        else if (amdgpu_connector->dac_load_detect) { /* try load detection */
                                const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
                                        ret = connector_status_connected;
                        } else {
                                /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
 -                              if (amdgpu_ddc_probe(amdgpu_connector, false))
 +                              if (amdgpu_display_ddc_probe(amdgpu_connector,
 +                                                           false))
                                        ret = connector_status_connected;
                        }
                }
  
        amdgpu_connector_update_scratch_regs(connector, ret);
  out:
-       pm_runtime_mark_last_busy(connector->dev->dev);
-       pm_runtime_put_autosuspend(connector->dev->dev);
+       if (!drm_kms_helper_is_poll_worker()) {
+               pm_runtime_mark_last_busy(connector->dev->dev);
+               pm_runtime_put_autosuspend(connector->dev->dev);
+       }
  
        return ret;
  }
@@@ -61,8 -61,7 +61,8 @@@
  
  #include "dcn/dcn_1_0_offset.h"
  #include "dcn/dcn_1_0_sh_mask.h"
 -#include "soc15ip.h"
 +#include "soc15_hw_ip.h"
 +#include "vega10_ip_offset.h"
  
  #include "soc15_common.h"
  #endif
@@@ -320,7 -319,6 +320,7 @@@ static void dm_crtc_high_irq(void *inte
                crtc_index = acrtc->crtc_id;
  
        drm_handle_vblank(adev->ddev, crtc_index);
 +      amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
  }
  
  static int dm_set_clockgating_state(void *handle,
@@@ -347,43 -345,23 +347,43 @@@ static void hotplug_notify_work_func(st
  }
  
  #if defined(CONFIG_DRM_AMD_DC_FBC)
 -#include "dal_asic_id.h"
  /* Allocate memory for FBC compressed data  */
 -/* TODO: Dynamic allocation */
 -#define AMDGPU_FBC_SIZE    (3840 * 2160 * 4)
 -
 -static void amdgpu_dm_initialize_fbc(struct amdgpu_device *adev)
 +static void amdgpu_dm_fbc_init(struct drm_connector *connector)
  {
 -      int r;
 +      struct drm_device *dev = connector->dev;
 +      struct amdgpu_device *adev = dev->dev_private;
        struct dm_comressor_info *compressor = &adev->dm.compressor;
 +      struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector);
 +      struct drm_display_mode *mode;
 +      unsigned long max_size = 0;
 +
 +      if (adev->dm.dc->fbc_compressor == NULL)
 +              return;
  
 -      if (!compressor->bo_ptr) {
 -              r = amdgpu_bo_create_kernel(adev, AMDGPU_FBC_SIZE, PAGE_SIZE,
 -                              AMDGPU_GEM_DOMAIN_VRAM, &compressor->bo_ptr,
 -                              &compressor->gpu_addr, &compressor->cpu_addr);
 +      if (aconn->dc_link->connector_signal != SIGNAL_TYPE_EDP)
 +              return;
 +
 +      if (compressor->bo_ptr)
 +              return;
 +
 +
 +      list_for_each_entry(mode, &connector->modes, head) {
 +              if (max_size < mode->htotal * mode->vtotal)
 +                      max_size = mode->htotal * mode->vtotal;
 +      }
 +
 +      if (max_size) {
 +              int r = amdgpu_bo_create_kernel(adev, max_size * 4, PAGE_SIZE,
 +                          AMDGPU_GEM_DOMAIN_GTT, &compressor->bo_ptr,
 +                          &compressor->gpu_addr, &compressor->cpu_addr);
  
                if (r)
 -                      DRM_ERROR("DM: Failed to initialize fbc\n");
 +                      DRM_ERROR("DM: Failed to initialize FBC\n");
 +              else {
 +                      adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr;
 +                      DRM_INFO("DM: FBC alloc %lu\n", max_size*4);
 +              }
 +
        }
  
  }
@@@ -403,6 -381,12 +403,6 @@@ static int amdgpu_dm_init(struct amdgpu
        /* Zero all the fields */
        memset(&init_data, 0, sizeof(init_data));
  
 -      /* initialize DAL's lock (for SYNC context use) */
 -      spin_lock_init(&adev->dm.dal_lock);
 -
 -      /* initialize DAL's mutex */
 -      mutex_init(&adev->dm.dal_mutex);
 -
        if(amdgpu_dm_irq_init(adev)) {
                DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
                goto error;
        init_data.asic_id.pci_revision_id = adev->rev_id;
        init_data.asic_id.hw_internal_rev = adev->external_rev_id;
  
 -      init_data.asic_id.vram_width = adev->mc.vram_width;
 +      init_data.asic_id.vram_width = adev->gmc.vram_width;
        /* TODO: initialize init_data.asic_id.vram_type here!!!! */
        init_data.asic_id.atombios_base_address =
                adev->mode_info.atom_context->bios;
        else
                init_data.log_mask = DC_MIN_LOG_MASK;
  
 -#if defined(CONFIG_DRM_AMD_DC_FBC)
 -      if (adev->family == FAMILY_CZ)
 -              amdgpu_dm_initialize_fbc(adev);
 -      init_data.fbc_gpu_addr = adev->dm.compressor.gpu_addr;
 -#endif
 +      /*
 +       * TODO debug why this doesn't work on Raven
 +       */
 +      if (adev->flags & AMD_IS_APU &&
 +          adev->asic_type >= CHIP_CARRIZO &&
 +          adev->asic_type < CHIP_RAVEN)
 +              init_data.flags.gpu_vm_support = true;
 +
        /* Display Core create. */
        adev->dm.dc = dc_create(&init_data);
  
                DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n",
                                adev->dm.freesync_module);
  
 +      amdgpu_dm_init_color_mod();
 +
        if (amdgpu_dm_initialize_drm_device(adev)) {
                DRM_ERROR(
                "amdgpu: failed to initialize sw for display support.\n");
@@@ -561,9 -540,9 +561,9 @@@ static int detect_mst_link_for_all_conn
  
  static int dm_late_init(void *handle)
  {
 -      struct drm_device *dev = ((struct amdgpu_device *)handle)->ddev;
 +      struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
 -      return detect_mst_link_for_all_connectors(dev);
 +      return detect_mst_link_for_all_connectors(adev->ddev);
  }
  
  static void s3_handle_mst(struct drm_device *dev, bool suspend)
@@@ -814,7 -793,7 +814,7 @@@ dm_atomic_state_alloc_free(struct drm_a
  }
  
  static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
 -      .fb_create = amdgpu_user_framebuffer_create,
 +      .fb_create = amdgpu_display_user_framebuffer_create,
        .output_poll_changed = drm_fb_helper_output_poll_changed,
        .atomic_check = amdgpu_dm_atomic_check,
        .atomic_commit = amdgpu_dm_atomic_commit,
@@@ -1130,9 -1109,8 +1130,9 @@@ static int dce110_register_irq_handlers
        unsigned client_id = AMDGPU_IH_CLIENTID_LEGACY;
  
        if (adev->asic_type == CHIP_VEGA10 ||
 +          adev->asic_type == CHIP_VEGA12 ||
            adev->asic_type == CHIP_RAVEN)
 -              client_id = AMDGPU_IH_CLIENTID_DCE;
 +              client_id = SOC15_IH_CLIENTID_DCE;
  
        int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
        int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
@@@ -1232,7 -1210,7 +1232,7 @@@ static int dcn10_register_irq_handlers(
        for (i = DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP;
                        i <= DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP + adev->mode_info.num_crtc - 1;
                        i++) {
 -              r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_DCE, i, &adev->crtc_irq);
 +              r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->crtc_irq);
  
                if (r) {
                        DRM_ERROR("Failed to add crtc irq id!\n");
        for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT;
                        i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + adev->mode_info.num_crtc - 1;
                        i++) {
 -              r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
 +              r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
                if (r) {
                        DRM_ERROR("Failed to add page flip irq id!\n");
                        return r;
        }
  
        /* HPD */
 -      r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
 +      r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
                        &adev->hpd_irq);
        if (r) {
                DRM_ERROR("Failed to add hpd irq id!\n");
@@@ -1307,9 -1285,9 +1307,9 @@@ static int amdgpu_dm_mode_config_init(s
        /* indicate support of immediate flip */
        adev->ddev->mode_config.async_page_flip = true;
  
 -      adev->ddev->mode_config.fb_base = adev->mc.aper_base;
 +      adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
  
 -      r = amdgpu_modeset_create_props(adev);
 +      r = amdgpu_display_modeset_create_props(adev);
        if (r)
                return r;
  
@@@ -1366,43 -1344,6 +1366,43 @@@ amdgpu_dm_register_backlight_device(str
  
  #endif
  
 +static int initialize_plane(struct amdgpu_display_manager *dm,
 +                           struct amdgpu_mode_info *mode_info,
 +                           int plane_id)
 +{
 +      struct amdgpu_plane *plane;
 +      unsigned long possible_crtcs;
 +      int ret = 0;
 +
 +      plane = kzalloc(sizeof(struct amdgpu_plane), GFP_KERNEL);
 +      mode_info->planes[plane_id] = plane;
 +
 +      if (!plane) {
 +              DRM_ERROR("KMS: Failed to allocate plane\n");
 +              return -ENOMEM;
 +      }
 +      plane->base.type = mode_info->plane_type[plane_id];
 +
 +      /*
 +       * HACK: IGT tests expect that each plane can only have one
 +       * one possible CRTC. For now, set one CRTC for each
 +       * plane that is not an underlay, but still allow multiple
 +       * CRTCs for underlay planes.
 +       */
 +      possible_crtcs = 1 << plane_id;
 +      if (plane_id >= dm->dc->caps.max_streams)
 +              possible_crtcs = 0xff;
 +
 +      ret = amdgpu_dm_plane_init(dm, mode_info->planes[plane_id], possible_crtcs);
 +
 +      if (ret) {
 +              DRM_ERROR("KMS: Failed to initialize plane\n");
 +              return ret;
 +      }
 +
 +      return ret;
 +}
 +
  /* In this architecture, the association
   * connector -> encoder -> crtc
   * id not really requried. The crtc and connector will hold the
  static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
  {
        struct amdgpu_display_manager *dm = &adev->dm;
 -      uint32_t i;
 +      int32_t i;
        struct amdgpu_dm_connector *aconnector = NULL;
        struct amdgpu_encoder *aencoder = NULL;
        struct amdgpu_mode_info *mode_info = &adev->mode_info;
        uint32_t link_cnt;
 -      unsigned long possible_crtcs;
 +      int32_t total_overlay_planes, total_primary_planes;
  
        link_cnt = dm->dc->caps.max_links;
        if (amdgpu_dm_mode_config_init(dm->adev)) {
                return -1;
        }
  
 -      for (i = 0; i < dm->dc->caps.max_planes; i++) {
 -              struct amdgpu_plane *plane;
 -
 -              plane = kzalloc(sizeof(struct amdgpu_plane), GFP_KERNEL);
 -              mode_info->planes[i] = plane;
 +      /* Identify the number of planes to be initialized */
 +      total_overlay_planes = dm->dc->caps.max_slave_planes;
 +      total_primary_planes = dm->dc->caps.max_planes - dm->dc->caps.max_slave_planes;
  
 -              if (!plane) {
 -                      DRM_ERROR("KMS: Failed to allocate plane\n");
 +      /* First initialize overlay planes, index starting after primary planes */
 +      for (i = (total_overlay_planes - 1); i >= 0; i--) {
 +              if (initialize_plane(dm, mode_info, (total_primary_planes + i))) {
 +                      DRM_ERROR("KMS: Failed to initialize overlay plane\n");
                        goto fail;
                }
 -              plane->base.type = mode_info->plane_type[i];
 -
 -              /*
 -               * HACK: IGT tests expect that each plane can only have one
 -               * one possible CRTC. For now, set one CRTC for each
 -               * plane that is not an underlay, but still allow multiple
 -               * CRTCs for underlay planes.
 -               */
 -              possible_crtcs = 1 << i;
 -              if (i >= dm->dc->caps.max_streams)
 -                      possible_crtcs = 0xff;
 +      }
  
 -              if (amdgpu_dm_plane_init(dm, mode_info->planes[i], possible_crtcs)) {
 -                      DRM_ERROR("KMS: Failed to initialize plane\n");
 +      /* Initialize primary planes */
 +      for (i = (total_primary_planes - 1); i >= 0; i--) {
 +              if (initialize_plane(dm, mode_info, i)) {
 +                      DRM_ERROR("KMS: Failed to initialize primary plane\n");
                        goto fail;
                }
        }
        case CHIP_POLARIS10:
        case CHIP_POLARIS12:
        case CHIP_VEGA10:
 +      case CHIP_VEGA12:
                if (dce110_register_irq_handlers(dm->adev)) {
                        DRM_ERROR("DM: Failed to initialize IRQ\n");
                        goto fail;
@@@ -1596,6 -1544,7 +1596,6 @@@ static int amdgpu_notify_freesync(struc
  static const struct amdgpu_display_funcs dm_display_funcs = {
        .bandwidth_update = dm_bandwidth_update, /* called unconditionally */
        .vblank_get_counter = dm_vblank_get_counter,/* called unconditionally */
 -      .vblank_wait = NULL,
        .backlight_set_level =
                dm_set_backlight_level,/* called unconditionally */
        .backlight_get_level =
@@@ -1646,6 -1595,8 +1646,6 @@@ static int dm_early_init(void *handle
  {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
 -      adev->ddev->driver->driver_features |= DRIVER_ATOMIC;
 -
        switch (adev->asic_type) {
        case CHIP_BONAIRE:
        case CHIP_HAWAII:
                adev->mode_info.plane_type = dm_plane_type_default;
                break;
        case CHIP_VEGA10:
 +      case CHIP_VEGA12:
                adev->mode_info.num_crtc = 6;
                adev->mode_info.num_hpd = 6;
                adev->mode_info.num_dig = 6;
@@@ -1948,7 -1898,6 +1948,7 @@@ static int fill_plane_attributes_from_f
                        AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
  
        if (adev->asic_type == CHIP_VEGA10 ||
 +          adev->asic_type == CHIP_VEGA12 ||
            adev->asic_type == CHIP_RAVEN) {
                /* Fill GFX9 params */
                plane_state->tiling_info.gfx9.num_pipes =
  
  }
  
 -static void fill_gamma_from_crtc_state(const struct drm_crtc_state *crtc_state,
 -                                     struct dc_plane_state *plane_state)
 -{
 -      int i;
 -      struct dc_gamma *gamma;
 -      struct drm_color_lut *lut =
 -                      (struct drm_color_lut *) crtc_state->gamma_lut->data;
 -
 -      gamma = dc_create_gamma();
 -
 -      if (gamma == NULL) {
 -              WARN_ON(1);
 -              return;
 -      }
 -
 -      gamma->type = GAMMA_RGB_256;
 -      gamma->num_entries = GAMMA_RGB_256_ENTRIES;
 -      for (i = 0; i < GAMMA_RGB_256_ENTRIES; i++) {
 -              gamma->entries.red[i] = dal_fixed31_32_from_int(lut[i].red);
 -              gamma->entries.green[i] = dal_fixed31_32_from_int(lut[i].green);
 -              gamma->entries.blue[i] = dal_fixed31_32_from_int(lut[i].blue);
 -      }
 -
 -      plane_state->gamma_correction = gamma;
 -}
 -
  static int fill_plane_attributes(struct amdgpu_device *adev,
                                 struct dc_plane_state *dc_plane_state,
                                 struct drm_plane_state *plane_state,
        if (input_tf == NULL)
                return -ENOMEM;
  
 -      input_tf->type = TF_TYPE_PREDEFINED;
 -      input_tf->tf = TRANSFER_FUNCTION_SRGB;
 -
        dc_plane_state->in_transfer_func = input_tf;
  
 -      /* In case of gamma set, update gamma value */
 -      if (crtc_state->gamma_lut)
 -              fill_gamma_from_crtc_state(crtc_state, dc_plane_state);
 +      /*
 +       * Always set input transfer function, since plane state is refreshed
 +       * every time.
 +       */
 +      ret = amdgpu_dm_set_degamma_lut(crtc_state, dc_plane_state);
 +      if (ret) {
 +              dc_transfer_func_release(dc_plane_state->in_transfer_func);
 +              dc_plane_state->in_transfer_func = NULL;
 +      }
  
        return ret;
  }
@@@ -2358,7 -2330,7 +2358,7 @@@ static void set_master_stream(struct dc
                }
        }
        for (j = 0;  j < stream_count; j++) {
 -              if (stream_set[j] && j != master_stream)
 +              if (stream_set[j])
                        stream_set[j]->triggered_crtc_reset.event_source = stream_set[master_stream];
        }
  }
@@@ -2557,7 -2529,6 +2557,7 @@@ static const struct drm_crtc_funcs amdg
        .page_flip = drm_atomic_helper_page_flip,
        .atomic_duplicate_state = dm_crtc_duplicate_state,
        .atomic_destroy_state = dm_crtc_destroy_state,
 +      .set_crc_source = amdgpu_dm_crtc_set_crc_source,
        .enable_vblank = dm_enable_vblank,
        .disable_vblank = dm_disable_vblank,
  };
@@@ -2835,7 -2806,6 +2835,7 @@@ int amdgpu_dm_connector_mode_valid(stru
        /* TODO: Unhardcode stream count */
        struct dc_stream_state *stream;
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 +      enum dc_status dc_result = DC_OK;
  
        if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
                        (mode->flags & DRM_MODE_FLAG_DBLSCAN))
                goto fail;
        }
  
 -      drm_mode_set_crtcinfo(mode, 0);
 -      fill_stream_properties_from_drm_display_mode(stream, mode, connector);
 +      dc_result = dc_validate_stream(adev->dm.dc, stream);
  
 -      stream->src.width = mode->hdisplay;
 -      stream->src.height = mode->vdisplay;
 -      stream->dst = stream->src;
 -
 -      if (dc_validate_stream(adev->dm.dc, stream) == DC_OK)
 +      if (dc_result == DC_OK)
                result = MODE_OK;
 +      else
 +              DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n",
 +                            mode->vdisplay,
 +                            mode->hdisplay,
 +                            mode->clock,
 +                            dc_result);
  
        dc_stream_release(stream);
  
@@@ -3012,13 -2981,11 +3012,13 @@@ static int dm_plane_helper_prepare_fb(s
  {
        struct amdgpu_framebuffer *afb;
        struct drm_gem_object *obj;
 +      struct amdgpu_device *adev;
        struct amdgpu_bo *rbo;
        uint64_t chroma_addr = 0;
 -      int r;
        struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
        unsigned int awidth;
 +      uint32_t domain;
 +      int r;
  
        dm_plane_state_old = to_dm_plane_state(plane->state);
        dm_plane_state_new = to_dm_plane_state(new_state);
  
        obj = afb->obj;
        rbo = gem_to_amdgpu_bo(obj);
 +      adev = amdgpu_ttm_adev(rbo->tbo.bdev);
        r = amdgpu_bo_reserve(rbo, false);
        if (unlikely(r != 0))
                return r;
  
 -      r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb->address);
 +      if (plane->type != DRM_PLANE_TYPE_CURSOR)
 +              domain = amdgpu_display_framebuffer_domains(adev);
 +      else
 +              domain = AMDGPU_GEM_DOMAIN_VRAM;
  
 +      r = amdgpu_bo_pin(rbo, domain, &afb->address);
  
        amdgpu_bo_unreserve(rbo);
  
@@@ -3172,8 -3134,6 +3172,6 @@@ static int amdgpu_dm_plane_init(struct 
  
        switch (aplane->base.type) {
        case DRM_PLANE_TYPE_PRIMARY:
-               aplane->base.format_default = true;
                res = drm_universal_plane_init(
                                dm->adev->ddev,
                                &aplane->base,
@@@ -3258,9 -3218,7 +3256,9 @@@ static int amdgpu_dm_crtc_init(struct a
        acrtc->base.enabled = false;
  
        dm->adev->mode_info.crtcs[crtc_index] = acrtc;
 -      drm_mode_crtc_set_gamma_size(&acrtc->base, 256);
 +      drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES,
 +                                 true, MAX_COLOR_LUT_ENTRIES);
 +      drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
  
        return 0;
  
@@@ -3436,12 -3394,9 +3434,12 @@@ static int amdgpu_dm_connector_get_mode
        struct edid *edid = amdgpu_dm_connector->edid;
  
        encoder = helper->best_encoder(connector);
 -
        amdgpu_dm_connector_ddc_get_modes(connector, edid);
        amdgpu_dm_connector_add_common_modes(encoder, connector);
 +
 +#if defined(CONFIG_DRM_AMD_DC_FBC)
 +      amdgpu_dm_fbc_init(connector);
 +#endif
        return amdgpu_dm_connector->num_modes;
  }
  
@@@ -3714,7 -3669,7 +3712,7 @@@ static void manage_dm_interrupts(struc
         * constant is the same as PFLIP
         */
        int irq_type =
 -              amdgpu_crtc_idx_to_irq_type(
 +              amdgpu_display_crtc_idx_to_irq_type(
                        adev,
                        acrtc->crtc_id);
  
@@@ -3909,7 -3864,7 +3907,7 @@@ static void amdgpu_dm_do_flip(struct dr
  
  
        /* Prepare wait for target vblank early - before the fence-waits */
 -      target_vblank = target - drm_crtc_vblank_count(crtc) +
 +      target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
                        amdgpu_get_vblank_counter_kms(crtc->dev, acrtc->crtc_id);
  
        /* TODO This might fail and hence better not used, wait
         * targeted by the flip
         */
        while ((acrtc->enabled &&
 -              (amdgpu_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id, 0,
 -                                      &vpos, &hpos, NULL, NULL,
 -                                      &crtc->hwmode)
 +              (amdgpu_display_get_crtc_scanoutpos(adev->ddev, acrtc->crtc_id,
 +                                                  0, &vpos, &hpos, NULL,
 +                                                  NULL, &crtc->hwmode)
                 & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
                (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
                (int)(target_vblank -
@@@ -4055,7 -4010,7 +4053,7 @@@ static void amdgpu_dm_commit_planes(str
                        amdgpu_dm_do_flip(
                                crtc,
                                fb,
 -                              drm_crtc_vblank_count(crtc) + *wait_for_vblank,
 +                              (uint32_t)drm_crtc_vblank_count(crtc) + *wait_for_vblank,
                                dm_state->context);
                }
  
@@@ -4676,30 -4631,6 +4674,30 @@@ next_crtc
                /* Release extra reference */
                if (new_stream)
                         dc_stream_release(new_stream);
 +
 +              /*
 +               * We want to do dc stream updates that do not require a
 +               * full modeset below.
 +               */
 +              if (!enable || !aconnector || modereset_required(new_crtc_state))
 +                      continue;
 +              /*
 +               * Given above conditions, the dc state cannot be NULL because:
 +               * 1. We're attempting to enable a CRTC. Which has a...
 +               * 2. Valid connector attached, and
 +               * 3. User does not want to reset it (disable or mark inactive,
 +               *    which can happen on a CRTC that's already disabled).
 +               * => It currently exists.
 +               */
 +              BUG_ON(dm_new_crtc_state->stream == NULL);
 +
 +              /* Color managment settings */
 +              if (dm_new_crtc_state->base.color_mgmt_changed) {
 +                      ret = amdgpu_dm_set_regamma_lut(dm_new_crtc_state);
 +                      if (ret)
 +                              goto fail;
 +                      amdgpu_dm_set_ctm(dm_new_crtc_state);
 +              }
        }
  
        return ret;
@@@ -4728,8 -4659,8 +4726,8 @@@ static int dm_update_planes_state(struc
        int ret = 0;
  
  
 -      /* Add new planes */
 -      for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
 +      /* Add new planes, in reverse order as DC expectation */
 +      for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
                new_plane_crtc = new_plane_state->crtc;
                old_plane_crtc = old_plane_state->crtc;
                dm_new_plane_state = to_dm_plane_state(new_plane_state);
                        WARN_ON(dm_new_plane_state->dc_state);
  
                        dc_new_plane_state = dc_create_plane_state(dc);
 -                      if (!dc_new_plane_state) {
 -                              ret = -EINVAL;
 -                              return ret;
 -                      }
 +                      if (!dc_new_plane_state)
 +                              return -ENOMEM;
  
                        DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n",
                                        plane->base.id, new_plane_crtc->base.id);
                                        dm_state->context)) {
  
                                dc_plane_state_release(dc_new_plane_state);
 -                              ret = -EINVAL;
 -                              return ret;
 +                              return -EINVAL;
                        }
  
                        dm_new_plane_state->dc_state = dc_new_plane_state;
@@@ -109,7 -109,7 +109,7 @@@ enum dc_edid_status dm_helpers_parse_ed
                struct cea_sad *sad = &sads[i];
  
                edid_caps->audio_modes[i].format_code = sad->format;
-               edid_caps->audio_modes[i].channel_count = sad->channels;
+               edid_caps->audio_modes[i].channel_count = sad->channels + 1;
                edid_caps->audio_modes[i].sample_rate = sad->freq;
                edid_caps->audio_modes[i].sample_size = sad->byte2;
        }
@@@ -258,15 -258,6 +258,15 @@@ bool dm_helpers_dp_mst_write_payload_al
        return true;
  }
  
 +
 +/*
 + * Clear payload allocation table before enable MST DP link.
 + */
 +void dm_helpers_dp_mst_clear_payload_allocation_table(
 +      struct dc_context *ctx,
 +      const struct dc_link *link)
 +{}
 +
  /*
   * Polls for ACT (allocation change trigger) handled and sends
   * ALLOCATE_PAYLOAD message.
@@@ -505,8 -496,3 +505,8 @@@ enum dc_edid_status dm_helpers_read_loc
  
        return edid_status;
  }
 +
 +void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
 +{
 +      /* TODO: something */
 +}
@@@ -113,6 -113,9 +113,9 @@@ static const struct edid_quirk 
        /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
        { "AEO", 0, EDID_QUIRK_FORCE_6BPC },
  
+       /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
+       { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
        /* Belinea 10 15 55 */
        { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
        { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
  
        /* HTC Vive VR Headset */
        { "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP },
+       /* Oculus Rift DK1, DK2, and CV1 VR Headsets */
+       { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP },
+       { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP },
+       { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP },
+       /* Windows Mixed Reality Headsets */
+       { "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+       { "HPN", 0x3515, EDID_QUIRK_NON_DESKTOP },
+       { "LEN", 0x0408, EDID_QUIRK_NON_DESKTOP },
+       { "LEN", 0xb800, EDID_QUIRK_NON_DESKTOP },
+       { "FUJ", 0x1970, EDID_QUIRK_NON_DESKTOP },
+       { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP },
+       { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP },
+       { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP },
+       /* Sony PlayStation VR Headset */
+       { "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP },
  };
  
  /*
@@@ -1554,7 -1575,8 +1575,7 @@@ struct edid *drm_do_get_edid(struct drm
        struct edid *override = NULL;
  
        if (connector->override_edid)
 -              override = drm_edid_duplicate((const struct edid *)
 -                                            connector->edid_blob_ptr->data);
 +              override = drm_edid_duplicate(connector->edid_blob_ptr->data);
  
        if (!override)
                override = drm_load_edid_firmware(connector);
@@@ -2082,8 -2104,6 +2103,8 @@@ drm_mode_std(struct drm_connector *conn
        if (hsize == 1366 && vsize == 768 && vrefresh_rate == 60) {
                mode = drm_cvt_mode(dev, 1366, 768, vrefresh_rate, 0, 0,
                                    false);
 +              if (!mode)
 +                      return NULL;
                mode->hdisplay = 1366;
                mode->hsync_start = mode->hsync_start - 1;
                mode->hsync_end = mode->hsync_end - 1;
@@@ -2768,7 -2788,7 +2789,7 @@@ do_detailed_mode(struct detailed_timin
  
                drm_mode_probed_add(closure->connector, newmode);
                closure->modes++;
 -              closure->preferred = 0;
 +              closure->preferred = false;
        }
  }
  
@@@ -2785,7 -2805,7 +2806,7 @@@ add_detailed_modes(struct drm_connecto
        struct detailed_mode_closure closure = {
                .connector = connector,
                .edid = edid,
 -              .preferred = 1,
 +              .preferred = true,
                .quirks = quirks,
        };
  
@@@ -121,6 -121,10 +121,10 @@@ int drm_mode_addfb(struct drm_device *d
        r.pixel_format = drm_mode_legacy_fb_format(or->bpp, or->depth);
        r.handles[0] = or->handle;
  
+       if (r.pixel_format == DRM_FORMAT_XRGB2101010 &&
+           dev->driver->driver_features & DRIVER_PREFER_XBGR_30BPP)
+               r.pixel_format = DRM_FORMAT_XBGR2101010;
        ret = drm_mode_addfb2(dev, &r, file_priv);
        if (ret)
                return ret;
@@@ -158,10 -162,9 +162,10 @@@ static int framebuffer_check(struct drm
        info = __drm_format_info(r->pixel_format & ~DRM_FORMAT_BIG_ENDIAN);
        if (!info) {
                struct drm_format_name_buf format_name;
 +
                DRM_DEBUG_KMS("bad framebuffer format %s\n",
 -                            drm_get_format_name(r->pixel_format,
 -                                                &format_name));
 +                            drm_get_format_name(r->pixel_format,
 +                                                &format_name));
                return -EINVAL;
        }
  
@@@ -458,6 -461,12 +462,12 @@@ int drm_mode_getfb(struct drm_device *d
        if (!fb)
                return -ENOENT;
  
+       /* Multi-planar framebuffers need getfb2. */
+       if (fb->format->num_planes > 1) {
+               ret = -EINVAL;
+               goto out;
+       }
        r->height = fb->height;
        r->width = fb->width;
        r->depth = fb->format->depth;
                ret = -ENODEV;
        }
  
+ out:
        drm_framebuffer_put(fb);
  
        return ret;
diff --combined drivers/gpu/drm/drm_mm.c
@@@ -180,7 -180,7 +180,7 @@@ static void drm_mm_interval_tree_add_no
        struct drm_mm *mm = hole_node->mm;
        struct rb_node **link, *rb;
        struct drm_mm_node *parent;
 -      bool leftmost = true;
 +      bool leftmost;
  
        node->__subtree_last = LAST(node);
  
        } else {
                rb = NULL;
                link = &mm->interval_tree.rb_root.rb_node;
 +              leftmost = true;
        }
  
        while (*link) {
                parent = rb_entry(rb, struct drm_mm_node, rb);
                if (parent->__subtree_last < node->__subtree_last)
                        parent->__subtree_last = node->__subtree_last;
 -              if (node->start < parent->start)
 +              if (node->start < parent->start) {
                        link = &parent->rb.rb_left;
 -              else {
 +              else {
                        link = &parent->rb.rb_right;
 -                      leftmost = true;
 +                      leftmost = false;
                }
        }
  
@@@ -837,9 -836,24 +837,24 @@@ struct drm_mm_node *drm_mm_scan_color_e
        if (!mm->color_adjust)
                return NULL;
  
-       hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack);
-       hole_start = __drm_mm_hole_node_start(hole);
-       hole_end = hole_start + hole->hole_size;
+       /*
+        * The hole found during scanning should ideally be the first element
+        * in the hole_stack list, but due to side-effects in the driver it
+        * may not be.
+        */
+       list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
+               hole_start = __drm_mm_hole_node_start(hole);
+               hole_end = hole_start + hole->hole_size;
+               if (hole_start <= scan->hit_start &&
+                   hole_end >= scan->hit_end)
+                       break;
+       }
+       /* We should only be called after we found the hole previously */
+       DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack);
+       if (unlikely(&hole->hole_stack == &mm->hole_stack))
+               return NULL;
  
        DRM_MM_BUG_ON(hole_start > scan->hit_start);
        DRM_MM_BUG_ON(hole_end < scan->hit_end);
@@@ -499,7 -499,7 +499,7 @@@ retry
  
        list_for_each_entry(mode, &connector->modes, head) {
                if (mode->status == MODE_OK)
 -                      mode->status = drm_mode_validate_basic(mode);
 +                      mode->status = drm_mode_validate_driver(dev, mode);
  
                if (mode->status == MODE_OK)
                        mode->status = drm_mode_validate_size(mode, maxX, maxY);
@@@ -654,6 -654,26 +654,26 @@@ out
  }
  
  /**
+  * drm_kms_helper_is_poll_worker - is %current task an output poll worker?
+  *
+  * Determine if %current task is an output poll worker.  This can be used
+  * to select distinct code paths for output polling versus other contexts.
+  *
+  * One use case is to avoid a deadlock between the output poll worker and
+  * the autosuspend worker wherein the latter waits for polling to finish
+  * upon calling drm_kms_helper_poll_disable(), while the former waits for
+  * runtime suspend to finish upon calling pm_runtime_get_sync() in a
+  * connector ->detect hook.
+  */
+ bool drm_kms_helper_is_poll_worker(void)
+ {
+       struct work_struct *work = current_work();
+       return work && work->func == output_poll_execute;
+ }
+ EXPORT_SYMBOL(drm_kms_helper_is_poll_worker);
+ /**
   * drm_kms_helper_poll_disable - disable output polling
   * @dev: drm_device
   *
@@@ -41,7 -41,6 +41,7 @@@
  #include <linux/kvm_host.h>
  #include <linux/vfio.h>
  #include <linux/mdev.h>
 +#include <linux/debugfs.h>
  
  #include "i915_drv.h"
  #include "gvt.h"
@@@ -85,16 -84,12 +85,16 @@@ struct kvmgt_guest_info 
  #define NR_BKT (1 << 18)
        struct hlist_head ptable[NR_BKT];
  #undef NR_BKT
 +      struct dentry *debugfs_cache_entries;
  };
  
  struct gvt_dma {
 -      struct rb_node node;
 +      struct intel_vgpu *vgpu;
 +      struct rb_node gfn_node;
 +      struct rb_node dma_addr_node;
        gfn_t gfn;
 -      unsigned long iova;
 +      dma_addr_t dma_addr;
 +      struct kref ref;
  };
  
  static inline bool handle_valid(unsigned long handle)
@@@ -106,168 -101,165 +106,168 @@@ static int kvmgt_guest_init(struct mdev
  static void intel_vgpu_release_work(struct work_struct *work);
  static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
  
 -static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
 -              unsigned long *iova)
 +static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
 +              dma_addr_t *dma_addr)
  {
 -      struct page *page;
        struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 -      dma_addr_t daddr;
 +      struct page *page;
 +      unsigned long pfn;
 +      int ret;
  
 -      if (unlikely(!pfn_valid(pfn)))
 -              return -EFAULT;
 +      /* Pin the page first. */
 +      ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
 +                           IOMMU_READ | IOMMU_WRITE, &pfn);
 +      if (ret != 1) {
 +              gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
 +                           gfn, ret);
 +              return -EINVAL;
 +      }
  
 +      /* Setup DMA mapping. */
        page = pfn_to_page(pfn);
 -      daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
 -                      PCI_DMA_BIDIRECTIONAL);
 -      if (dma_mapping_error(dev, daddr))
 +      *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
 +                               PCI_DMA_BIDIRECTIONAL);
 +      if (dma_mapping_error(dev, *dma_addr)) {
 +              gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn);
 +              vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
                return -ENOMEM;
 +      }
  
 -      *iova = (unsigned long)(daddr >> PAGE_SHIFT);
        return 0;
  }
  
 -static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
 +static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
 +              dma_addr_t dma_addr)
  {
        struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 -      dma_addr_t daddr;
 +      int ret;
  
 -      daddr = (dma_addr_t)(iova << PAGE_SHIFT);
 -      dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 +      dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 +      ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
 +      WARN_ON(ret != 1);
  }
  
 -static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
 +static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
 +              dma_addr_t dma_addr)
  {
 -      struct rb_node *node = vgpu->vdev.cache.rb_node;
 -      struct gvt_dma *ret = NULL;
 +      struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node;
 +      struct gvt_dma *itr;
  
        while (node) {
 -              struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
 +              itr = rb_entry(node, struct gvt_dma, dma_addr_node);
  
 -              if (gfn < itr->gfn)
 +              if (dma_addr < itr->dma_addr)
                        node = node->rb_left;
 -              else if (gfn > itr->gfn)
 +              else if (dma_addr > itr->dma_addr)
                        node = node->rb_right;
 -              else {
 -                      ret = itr;
 -                      goto out;
 -              }
 +              else
 +                      return itr;
        }
 -
 -out:
 -      return ret;
 +      return NULL;
  }
  
 -static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
 +static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
  {
 -      struct gvt_dma *entry;
 -      unsigned long iova;
 -
 -      mutex_lock(&vgpu->vdev.cache_lock);
 +      struct rb_node *node = vgpu->vdev.gfn_cache.rb_node;
 +      struct gvt_dma *itr;
  
 -      entry = __gvt_cache_find(vgpu, gfn);
 -      iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
 +      while (node) {
 +              itr = rb_entry(node, struct gvt_dma, gfn_node);
  
 -      mutex_unlock(&vgpu->vdev.cache_lock);
 -      return iova;
 +              if (gfn < itr->gfn)
 +                      node = node->rb_left;
 +              else if (gfn > itr->gfn)
 +                      node = node->rb_right;
 +              else
 +                      return itr;
 +      }
 +      return NULL;
  }
  
 -static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 -              unsigned long iova)
 +static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 +              dma_addr_t dma_addr)
  {
        struct gvt_dma *new, *itr;
 -      struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
 +      struct rb_node **link, *parent = NULL;
  
        new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
        if (!new)
 -              return;
 +              return -ENOMEM;
  
 +      new->vgpu = vgpu;
        new->gfn = gfn;
 -      new->iova = iova;
 +      new->dma_addr = dma_addr;
 +      kref_init(&new->ref);
  
 -      mutex_lock(&vgpu->vdev.cache_lock);
 +      /* gfn_cache maps gfn to struct gvt_dma. */
 +      link = &vgpu->vdev.gfn_cache.rb_node;
        while (*link) {
                parent = *link;
 -              itr = rb_entry(parent, struct gvt_dma, node);
 +              itr = rb_entry(parent, struct gvt_dma, gfn_node);
  
 -              if (gfn == itr->gfn)
 -                      goto out;
 -              else if (gfn < itr->gfn)
 +              if (gfn < itr->gfn)
                        link = &parent->rb_left;
                else
                        link = &parent->rb_right;
        }
 +      rb_link_node(&new->gfn_node, parent, link);
 +      rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache);
  
 -      rb_link_node(&new->node, parent, link);
 -      rb_insert_color(&new->node, &vgpu->vdev.cache);
 -      mutex_unlock(&vgpu->vdev.cache_lock);
 -      return;
 +      /* dma_addr_cache maps dma addr to struct gvt_dma. */
 +      parent = NULL;
 +      link = &vgpu->vdev.dma_addr_cache.rb_node;
 +      while (*link) {
 +              parent = *link;
 +              itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
  
 -out:
 -      mutex_unlock(&vgpu->vdev.cache_lock);
 -      kfree(new);
 +              if (dma_addr < itr->dma_addr)
 +                      link = &parent->rb_left;
 +              else
 +                      link = &parent->rb_right;
 +      }
 +      rb_link_node(&new->dma_addr_node, parent, link);
 +      rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache);
 +
 +      vgpu->vdev.nr_cache_entries++;
 +      return 0;
  }
  
  static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
                                struct gvt_dma *entry)
  {
 -      rb_erase(&entry->node, &vgpu->vdev.cache);
 +      rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache);
 +      rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache);
        kfree(entry);
 -}
 -
 -static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
 -{
 -      struct device *dev = mdev_dev(vgpu->vdev.mdev);
 -      struct gvt_dma *this;
 -      unsigned long g1;
 -      int rc;
 -
 -      mutex_lock(&vgpu->vdev.cache_lock);
 -      this  = __gvt_cache_find(vgpu, gfn);
 -      if (!this) {
 -              mutex_unlock(&vgpu->vdev.cache_lock);
 -              return;
 -      }
 -
 -      g1 = gfn;
 -      gvt_dma_unmap_iova(vgpu, this->iova);
 -      rc = vfio_unpin_pages(dev, &g1, 1);
 -      WARN_ON(rc != 1);
 -      __gvt_cache_remove_entry(vgpu, this);
 -      mutex_unlock(&vgpu->vdev.cache_lock);
 -}
 -
 -static void gvt_cache_init(struct intel_vgpu *vgpu)
 -{
 -      vgpu->vdev.cache = RB_ROOT;
 -      mutex_init(&vgpu->vdev.cache_lock);
 +      vgpu->vdev.nr_cache_entries--;
  }
  
  static void gvt_cache_destroy(struct intel_vgpu *vgpu)
  {
        struct gvt_dma *dma;
        struct rb_node *node = NULL;
 -      struct device *dev = mdev_dev(vgpu->vdev.mdev);
 -      unsigned long gfn;
  
        for (;;) {
                mutex_lock(&vgpu->vdev.cache_lock);
 -              node = rb_first(&vgpu->vdev.cache);
 +              node = rb_first(&vgpu->vdev.gfn_cache);
                if (!node) {
                        mutex_unlock(&vgpu->vdev.cache_lock);
                        break;
                }
 -              dma = rb_entry(node, struct gvt_dma, node);
 -              gvt_dma_unmap_iova(vgpu, dma->iova);
 -              gfn = dma->gfn;
 +              dma = rb_entry(node, struct gvt_dma, gfn_node);
 +              gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr);
                __gvt_cache_remove_entry(vgpu, dma);
                mutex_unlock(&vgpu->vdev.cache_lock);
 -              vfio_unpin_pages(dev, &gfn, 1);
        }
  }
  
 +static void gvt_cache_init(struct intel_vgpu *vgpu)
 +{
 +      vgpu->vdev.gfn_cache = RB_ROOT;
 +      vgpu->vdev.dma_addr_cache = RB_ROOT;
 +      vgpu->vdev.nr_cache_entries = 0;
 +      mutex_init(&vgpu->vdev.cache_lock);
 +}
 +
  static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
  {
        hash_init(info->ptable);
@@@ -460,7 -452,7 +460,7 @@@ static int intel_vgpu_create(struct kob
        vgpu = intel_gvt_ops->vgpu_create(gvt, type);
        if (IS_ERR_OR_NULL(vgpu)) {
                ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
 -              gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
 +              gvt_err("failed to create intel vgpu: %d\n", ret);
                goto out;
        }
  
@@@ -497,22 -489,13 +497,22 @@@ static int intel_vgpu_iommu_notifier(st
  
        if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
                struct vfio_iommu_type1_dma_unmap *unmap = data;
 -              unsigned long gfn, end_gfn;
 +              struct gvt_dma *entry;
 +              unsigned long iov_pfn, end_iov_pfn;
 +
 +              iov_pfn = unmap->iova >> PAGE_SHIFT;
 +              end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
  
 -              gfn = unmap->iova >> PAGE_SHIFT;
 -              end_gfn = gfn + unmap->size / PAGE_SIZE;
 +              mutex_lock(&vgpu->vdev.cache_lock);
 +              for (; iov_pfn < end_iov_pfn; iov_pfn++) {
 +                      entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
 +                      if (!entry)
 +                              continue;
  
 -              while (gfn < end_gfn)
 -                      gvt_cache_remove(vgpu, gfn++);
 +                      gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr);
 +                      __gvt_cache_remove_entry(vgpu, entry);
 +              }
 +              mutex_unlock(&vgpu->vdev.cache_lock);
        }
  
        return NOTIFY_OK;
@@@ -750,6 -733,25 +750,25 @@@ static ssize_t intel_vgpu_rw(struct mde
        return ret == 0 ? count : ret;
  }
  
+ static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
+ {
+       struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       struct intel_gvt *gvt = vgpu->gvt;
+       int offset;
+       /* Only allow MMIO GGTT entry access */
+       if (index != PCI_BASE_ADDRESS_0)
+               return false;
+       offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+               intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+       return (offset >= gvt->device_info.gtt_start_offset &&
+               offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+                       true : false;
+ }
  static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
                        size_t count, loff_t *ppos)
  {
        while (count) {
                size_t filled;
  
-               if (count >= 4 && !(*ppos % 4)) {
+               /* Only support GGTT entry 8 bytes read */
+               if (count >= 8 && !(*ppos % 8) &&
+                       gtt_entry(mdev, ppos)) {
+                       u64 val;
+                       ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+                                       ppos, false);
+                       if (ret <= 0)
+                               goto read_err;
+                       if (copy_to_user(buf, &val, sizeof(val)))
+                               goto read_err;
+                       filled = 8;
+               } else if (count >= 4 && !(*ppos % 4)) {
                        u32 val;
  
                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
@@@ -819,7 -835,21 +852,21 @@@ static ssize_t intel_vgpu_write(struct 
        while (count) {
                size_t filled;
  
-               if (count >= 4 && !(*ppos % 4)) {
+               /* Only support GGTT entry 8 bytes write */
+               if (count >= 8 && !(*ppos % 8) &&
+                       gtt_entry(mdev, ppos)) {
+                       u64 val;
+                       if (copy_from_user(&val, buf, sizeof(val)))
+                               goto write_err;
+                       ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
+                                       ppos, true);
+                       if (ret <= 0)
+                               goto write_err;
+                       filled = 8;
+               } else if (count >= 4 && !(*ppos % 4)) {
                        u32 val;
  
                        if (copy_from_user(&val, buf, sizeof(val)))
@@@ -1338,7 -1368,7 +1385,7 @@@ static void kvmgt_host_exit(struct devi
        mdev_unregister_device(dev);
  }
  
 -static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
 +static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
  {
        struct kvmgt_guest_info *info;
        struct kvm *kvm;
@@@ -1372,7 -1402,7 +1419,7 @@@ out
        return 0;
  }
  
 -static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
 +static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
  {
        struct kvmgt_guest_info *info;
        struct kvm *kvm;
@@@ -1500,20 -1530,11 +1547,20 @@@ static int kvmgt_guest_init(struct mdev
        info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
        kvm_page_track_register_notifier(kvm, &info->track_node);
  
 +      info->debugfs_cache_entries = debugfs_create_ulong(
 +                                              "kvmgt_nr_cache_entries",
 +                                              0444, vgpu->debugfs,
 +                                              &vgpu->vdev.nr_cache_entries);
 +      if (!info->debugfs_cache_entries)
 +              gvt_vgpu_err("Cannot create kvmgt debugfs entry\n");
 +
        return 0;
  }
  
  static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
  {
 +      debugfs_remove(info->debugfs_cache_entries);
 +
        kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
        kvm_put_kvm(info->kvm);
        kvmgt_protect_table_destroy(info);
@@@ -1553,84 -1574,39 +1600,84 @@@ static int kvmgt_inject_msi(unsigned lo
  
  static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
  {
 -      unsigned long iova, pfn;
        struct kvmgt_guest_info *info;
 -      struct device *dev;
 -      struct intel_vgpu *vgpu;
 -      int rc;
 +      kvm_pfn_t pfn;
  
        if (!handle_valid(handle))
                return INTEL_GVT_INVALID_ADDR;
  
        info = (struct kvmgt_guest_info *)handle;
 -      vgpu = info->vgpu;
 -      iova = gvt_cache_find(info->vgpu, gfn);
 -      if (iova != INTEL_GVT_INVALID_ADDR)
 -              return iova;
 -
 -      pfn = INTEL_GVT_INVALID_ADDR;
 -      dev = mdev_dev(info->vgpu->vdev.mdev);
 -      rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
 -      if (rc != 1) {
 -              gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
 -                      gfn, rc);
 -              return INTEL_GVT_INVALID_ADDR;
 -      }
 -      /* transfer to host iova for GFX to use DMA */
 -      rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
 -      if (rc) {
 -              gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
 -              vfio_unpin_pages(dev, &gfn, 1);
 +
 +      pfn = gfn_to_pfn(info->kvm, gfn);
 +      if (is_error_noslot_pfn(pfn))
                return INTEL_GVT_INVALID_ADDR;
 +
 +      return pfn;
 +}
 +
 +int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
 +              dma_addr_t *dma_addr)
 +{
 +      struct kvmgt_guest_info *info;
 +      struct intel_vgpu *vgpu;
 +      struct gvt_dma *entry;
 +      int ret;
 +
 +      if (!handle_valid(handle))
 +              return -EINVAL;
 +
 +      info = (struct kvmgt_guest_info *)handle;
 +      vgpu = info->vgpu;
 +
 +      mutex_lock(&info->vgpu->vdev.cache_lock);
 +
 +      entry = __gvt_cache_find_gfn(info->vgpu, gfn);
 +      if (!entry) {
 +              ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
 +              if (ret)
 +                      goto err_unlock;
 +
 +              ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr);
 +              if (ret)
 +                      goto err_unmap;
 +      } else {
 +              kref_get(&entry->ref);
 +              *dma_addr = entry->dma_addr;
        }
  
 -      gvt_cache_add(info->vgpu, gfn, iova);
 -      return iova;
 +      mutex_unlock(&info->vgpu->vdev.cache_lock);
 +      return 0;
 +
 +err_unmap:
 +      gvt_dma_unmap_page(vgpu, gfn, *dma_addr);
 +err_unlock:
 +      mutex_unlock(&info->vgpu->vdev.cache_lock);
 +      return ret;
 +}
 +
 +static void __gvt_dma_release(struct kref *ref)
 +{
 +      struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
 +
 +      gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr);
 +      __gvt_cache_remove_entry(entry->vgpu, entry);
 +}
 +
 +void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
 +{
 +      struct kvmgt_guest_info *info;
 +      struct gvt_dma *entry;
 +
 +      if (!handle_valid(handle))
 +              return;
 +
 +      info = (struct kvmgt_guest_info *)handle;
 +
 +      mutex_lock(&info->vgpu->vdev.cache_lock);
 +      entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
 +      if (entry)
 +              kref_put(&entry->ref, __gvt_dma_release);
 +      mutex_unlock(&info->vgpu->vdev.cache_lock);
  }
  
  static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
@@@ -1700,13 -1676,11 +1747,13 @@@ struct intel_gvt_mpt kvmgt_mpt = 
        .detach_vgpu = kvmgt_detach_vgpu,
        .inject_msi = kvmgt_inject_msi,
        .from_virt_to_mfn = kvmgt_virt_to_pfn,
 -      .set_wp_page = kvmgt_write_protect_add,
 -      .unset_wp_page = kvmgt_write_protect_remove,
 +      .enable_page_track = kvmgt_page_track_add,
 +      .disable_page_track = kvmgt_page_track_remove,
        .read_gpa = kvmgt_read_gpa,
        .write_gpa = kvmgt_write_gpa,
        .gfn_to_mfn = kvmgt_gfn_to_pfn,
 +      .dma_map_guest_page = kvmgt_dma_map_guest_page,
 +      .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
        .set_opregion = kvmgt_set_opregion,
        .get_vfio_device = kvmgt_get_vfio_device,
        .put_vfio_device = kvmgt_put_vfio_device,
@@@ -50,8 -50,6 +50,8 @@@
  #define RING_GFX_MODE(base)   _MMIO((base) + 0x29c)
  #define VF_GUARDBAND          _MMIO(0x83a4)
  
 +#define GEN9_MOCS_SIZE                64
 +
  /* Raw offset is appened to each line for convenience. */
  static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = {
        {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
@@@ -120,6 -118,7 +120,7 @@@ static struct engine_mmio gen9_engine_m
        {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
        {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
        {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+       {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
        {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
        {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
        {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
  
  static struct {
        bool initialized;
 -      u32 control_table[I915_NUM_ENGINES][64];
 -      u32 l3cc_table[32];
 +      u32 control_table[I915_NUM_ENGINES][GEN9_MOCS_SIZE];
 +      u32 l3cc_table[GEN9_MOCS_SIZE / 2];
  } gen9_render_mocs;
  
  static void load_render_mocs(struct drm_i915_private *dev_priv)
  
        for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) {
                offset.reg = regs[ring_id];
 -              for (i = 0; i < 64; i++) {
 +              for (i = 0; i < GEN9_MOCS_SIZE; i++) {
                        gen9_render_mocs.control_table[ring_id][i] =
                                I915_READ_FW(offset);
                        offset.reg += 4;
        }
  
        offset.reg = 0xb020;
 -      for (i = 0; i < 32; i++) {
 +      for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
                gen9_render_mocs.l3cc_table[i] =
                        I915_READ_FW(offset);
                offset.reg += 4;
        gen9_render_mocs.initialized = true;
  }
  
 +static int
 +restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
 +                               struct i915_request *req)
 +{
 +      u32 *cs;
 +      int ret;
 +      struct engine_mmio *mmio;
 +      struct intel_gvt *gvt = vgpu->gvt;
 +      int ring_id = req->engine->id;
 +      int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id];
 +
 +      if (count == 0)
 +              return 0;
 +
 +      ret = req->engine->emit_flush(req, EMIT_BARRIER);
 +      if (ret)
 +              return ret;
 +
 +      cs = intel_ring_begin(req, count * 2 + 2);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      *cs++ = MI_LOAD_REGISTER_IMM(count);
 +      for (mmio = gvt->engine_mmio_list.mmio;
 +           i915_mmio_reg_valid(mmio->reg); mmio++) {
 +              if (mmio->ring_id != ring_id ||
 +                  !mmio->in_context)
 +                      continue;
 +
 +              *cs++ = i915_mmio_reg_offset(mmio->reg);
 +              *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
 +                              (mmio->mask << 16);
 +              gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
 +                            *(cs-2), *(cs-1), vgpu->id, ring_id);
 +      }
 +
 +      *cs++ = MI_NOOP;
 +      intel_ring_advance(req, cs);
 +
 +      ret = req->engine->emit_flush(req, EMIT_BARRIER);
 +      if (ret)
 +              return ret;
 +
 +      return 0;
 +}
 +
 +static int
 +restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
 +                                      struct i915_request *req)
 +{
 +      unsigned int index;
 +      u32 *cs;
 +
 +      cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
 +
 +      for (index = 0; index < GEN9_MOCS_SIZE; index++) {
 +              *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
 +              *cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index));
 +              gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
 +                            *(cs-2), *(cs-1), vgpu->id, req->engine->id);
 +
 +      }
 +
 +      *cs++ = MI_NOOP;
 +      intel_ring_advance(req, cs);
 +
 +      return 0;
 +}
 +
 +static int
 +restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
 +                                   struct i915_request *req)
 +{
 +      unsigned int index;
 +      u32 *cs;
 +
 +      cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
 +
 +      for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
 +              *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
 +              *cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index));
 +              gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
 +                            *(cs-2), *(cs-1), vgpu->id, req->engine->id);
 +
 +      }
 +
 +      *cs++ = MI_NOOP;
 +      intel_ring_advance(req, cs);
 +
 +      return 0;
 +}
 +
 +/*
 + * Use lri command to initialize the mmio which is in context state image for
 + * inhibit context, it contains tracked engine mmio, render_mocs and
 + * render_mocs_l3cc.
 + */
 +int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
 +                                     struct i915_request *req)
 +{
 +      int ret;
 +      u32 *cs;
 +
 +      cs = intel_ring_begin(req, 2);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 +      *cs++ = MI_NOOP;
 +      intel_ring_advance(req, cs);
 +
 +      ret = restore_context_mmio_for_inhibit(vgpu, req);
 +      if (ret)
 +              goto out;
 +
 +      /* no MOCS register in context except render engine */
 +      if (req->engine->id != RCS)
 +              goto out;
 +
 +      ret = restore_render_mocs_control_for_inhibit(vgpu, req);
 +      if (ret)
 +              goto out;
 +
 +      ret = restore_render_mocs_l3cc_for_inhibit(vgpu, req);
 +      if (ret)
 +              goto out;
 +
 +out:
 +      cs = intel_ring_begin(req, 2);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 +      *cs++ = MI_NOOP;
 +      intel_ring_advance(req, cs);
 +
 +      return ret;
 +}
 +
  static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
  {
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
@@@ -400,14 -252,11 +401,14 @@@ static void switch_mocs(struct intel_vg
        if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
                return;
  
 +      if (IS_KABYLAKE(dev_priv) && ring_id == RCS)
 +              return;
 +
        if (!pre && !gen9_render_mocs.initialized)
                load_render_mocs(dev_priv);
  
        offset.reg = regs[ring_id];
 -      for (i = 0; i < 64; i++) {
 +      for (i = 0; i < GEN9_MOCS_SIZE; i++) {
                if (pre)
                        old_v = vgpu_vreg_t(pre, offset);
                else
  
        if (ring_id == RCS) {
                l3_offset.reg = 0xb020;
 -              for (i = 0; i < 32; i++) {
 +              for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
                        if (pre)
                                old_v = vgpu_vreg_t(pre, l3_offset);
                        else
  
  #define CTX_CONTEXT_CONTROL_VAL       0x03
  
 +bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id)
 +{
 +      u32 *reg_state = ctx->engine[ring_id].lrc_reg_state;
 +      u32 inhibit_mask =
 +              _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 +
 +      return inhibit_mask ==
 +              (reg_state[CTX_CONTEXT_CONTROL_VAL] & inhibit_mask);
 +}
 +
  /* Switch ring mmio values (context). */
  static void switch_mmio(struct intel_vgpu *pre,
                        struct intel_vgpu *next,
  {
        struct drm_i915_private *dev_priv;
        struct intel_vgpu_submission *s;
 -      u32 *reg_state, ctx_ctrl;
 -      u32 inhibit_mask =
 -              _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
        struct engine_mmio *mmio;
        u32 old_v, new_v;
  
        if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
                switch_mocs(pre, next, ring_id);
  
 -      for (mmio = dev_priv->gvt->engine_mmio_list;
 +      for (mmio = dev_priv->gvt->engine_mmio_list.mmio;
             i915_mmio_reg_valid(mmio->reg); mmio++) {
                if (mmio->ring_id != ring_id)
                        continue;
 +              /*
 +               * No need to do save or restore of the mmio which is in context
 +               * state image on kabylake, it's initialized by lri command and
 +               * save or restore with context together.
 +               */
 +              if (IS_KABYLAKE(dev_priv) && mmio->in_context)
 +                      continue;
 +
                // save
                if (pre) {
                        vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg);
                // restore
                if (next) {
                        s = &next->submission;
 -                      reg_state =
 -                              s->shadow_ctx->engine[ring_id].lrc_reg_state;
 -                      ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
                        /*
 -                       * if it is an inhibit context, load in_context mmio
 -                       * into HW by mmio write. If it is not, skip this mmio
 -                       * write.
 +                       * No need to restore the mmio which is in context state
 +                       * image if it's not inhibit context, it will restore
 +                       * itself.
                         */
                        if (mmio->in_context &&
 -                          (ctx_ctrl & inhibit_mask) != inhibit_mask)
 +                          !is_inhibit_context(s->shadow_ctx, ring_id))
                                continue;
  
                        if (mmio->mask)
@@@ -557,9 -394,11 +558,11 @@@ void intel_gvt_switch_mmio(struct intel
         * performace for batch mmio read/write, so we need
         * handle forcewake mannually.
         */
+       intel_runtime_pm_get(dev_priv);
        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
        switch_mmio(pre, next, ring_id);
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+       intel_runtime_pm_put(dev_priv);
  }
  
  /**
   */
  void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt)
  {
 +      struct engine_mmio *mmio;
 +
        if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
 -              gvt->engine_mmio_list = gen9_engine_mmio_list;
 +              gvt->engine_mmio_list.mmio = gen9_engine_mmio_list;
        else
 -              gvt->engine_mmio_list = gen8_engine_mmio_list;
 +              gvt->engine_mmio_list.mmio = gen8_engine_mmio_list;
 +
 +      for (mmio = gvt->engine_mmio_list.mmio;
 +           i915_mmio_reg_valid(mmio->reg); mmio++) {
 +              if (mmio->in_context)
 +                      gvt->engine_mmio_list.ctx_mmio_count[mmio->ring_id]++;
 +      }
  }
@@@ -52,29 -52,54 +52,77 @@@ static void set_context_pdp_root_pointe
                pdp_pair[i].val = pdp[7 - i];
  }
  
 +static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 +{
 +      struct intel_vgpu *vgpu = workload->vgpu;
 +      int ring_id = workload->ring_id;
 +      struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 +      struct drm_i915_gem_object *ctx_obj =
 +              shadow_ctx->engine[ring_id].state->obj;
 +      struct execlist_ring_context *shadow_ring_context;
 +      struct page *page;
 +
 +      if (WARN_ON(!workload->shadow_mm))
 +              return;
 +
 +      if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount)))
 +              return;
 +
 +      page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
 +      shadow_ring_context = kmap(page);
 +      set_context_pdp_root_pointer(shadow_ring_context,
 +                      (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
 +      kunmap(page);
 +}
 +
+ /*
+  * when populating shadow ctx from guest, we should not overrride oa related
+  * registers, so that they will not be overlapped by guest oa configs. Thus
+  * made it possible to capture oa data from host for both host and guests.
+  */
+ static void sr_oa_regs(struct intel_vgpu_workload *workload,
+               u32 *reg_state, bool save)
+ {
+       struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+       u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+       u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+       int i = 0;
+       u32 flex_mmio[] = {
+               i915_mmio_reg_offset(EU_PERF_CNTL0),
+               i915_mmio_reg_offset(EU_PERF_CNTL1),
+               i915_mmio_reg_offset(EU_PERF_CNTL2),
+               i915_mmio_reg_offset(EU_PERF_CNTL3),
+               i915_mmio_reg_offset(EU_PERF_CNTL4),
+               i915_mmio_reg_offset(EU_PERF_CNTL5),
+               i915_mmio_reg_offset(EU_PERF_CNTL6),
+       };
+       if (!workload || !reg_state || workload->ring_id != RCS)
+               return;
+       if (save) {
+               workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+               for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+                       u32 state_offset = ctx_flexeu0 + i * 2;
+                       workload->flex_mmio[i] = reg_state[state_offset + 1];
+               }
+       } else {
+               reg_state[ctx_oactxctrl] =
+                       i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+               reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+               for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+                       u32 state_offset = ctx_flexeu0 + i * 2;
+                       u32 mmio = flex_mmio[i];
+                       reg_state[state_offset] = mmio;
+                       reg_state[state_offset + 1] = workload->flex_mmio[i];
+               }
+       }
+ }
  static int populate_shadow_context(struct intel_vgpu_workload *workload)
  {
        struct intel_vgpu *vgpu = workload->vgpu;
        page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
        shadow_ring_context = kmap(page);
  
+       sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
  #define COPY_REG(name) \
        intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
                + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
 +#define COPY_REG_MASKED(name) {\
 +              intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
 +                                            + RING_CTX_OFF(name.val),\
 +                                            &shadow_ring_context->name.val, 4);\
 +              shadow_ring_context->name.val |= 0xffff << 16;\
 +      }
  
 -      COPY_REG(ctx_ctrl);
 +      COPY_REG_MASKED(ctx_ctrl);
        COPY_REG(ctx_timestamp);
  
        if (ring_id == RCS) {
                COPY_REG(rcs_indirect_ctx_offset);
        }
  #undef COPY_REG
 -
 -      set_context_pdp_root_pointer(shadow_ring_context,
 -                                   workload->shadow_mm->shadow_page_table);
 +#undef COPY_REG_MASKED
  
        intel_gvt_hypervisor_read_gpa(vgpu,
                        workload->ring_context_gpa +
                        sizeof(*shadow_ring_context),
                        I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
  
+       sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
        kunmap(page);
        return 0;
  }
  
 -static inline bool is_gvt_request(struct drm_i915_gem_request *req)
 +static inline bool is_gvt_request(struct i915_request *req)
  {
        return i915_gem_context_force_single_submission(req->ctx);
  }
@@@ -175,7 -198,7 +225,7 @@@ static void save_ring_hw_state(struct i
  static int shadow_context_status_change(struct notifier_block *nb,
                unsigned long action, void *data)
  {
 -      struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
 +      struct i915_request *req = data;
        struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
                                shadow_ctx_notifier_block[req->engine->id]);
        struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
@@@ -252,11 -275,6 +302,11 @@@ static int copy_workload_to_ring_buffer
        struct intel_vgpu *vgpu = workload->vgpu;
        void *shadow_ring_buffer_va;
        u32 *cs;
 +      struct i915_request *req = workload->req;
 +
 +      if (IS_KABYLAKE(req->i915) &&
 +          is_inhibit_context(req->ctx, req->engine->id))
 +              intel_vgpu_restore_inhibit_context(vgpu, req);
  
        /* allocate shadow ring buffer */
        cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
@@@ -365,13 -383,13 +415,13 @@@ static int intel_gvt_generate_request(s
        int ring_id = workload->ring_id;
        struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
        struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 -      struct drm_i915_gem_request *rq;
 +      struct i915_request *rq;
        struct intel_vgpu *vgpu = workload->vgpu;
        struct intel_vgpu_submission *s = &vgpu->submission;
        struct i915_gem_context *shadow_ctx = s->shadow_ctx;
        int ret;
  
 -      rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
 +      rq = i915_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
        if (IS_ERR(rq)) {
                gvt_vgpu_err("fail to allocate gem request\n");
                ret = PTR_ERR(rq);
  
        gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
  
 -      workload->req = i915_gem_request_get(rq);
 +      workload->req = i915_request_get(rq);
        ret = copy_workload_to_ring_buffer(workload);
        if (ret)
                goto err_unpin;
@@@ -408,6 -426,17 +458,17 @@@ static int prepare_shadow_batch_buffer(
                        goto err;
                }
  
+               /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+                * is only updated into ring_scan_buffer, not real ring address
+                * allocated in later copy_workload_to_ring_buffer. pls be noted
+                * shadow_ring_buffer_va is now pointed to real ring buffer va
+                * in copy_workload_to_ring_buffer.
+                */
+               if (bb->bb_offset)
+                       bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+                               + bb->bb_offset;
                /* relocate shadow batch buffer */
                bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
                if (gmadr_bytes == 8)
@@@ -536,8 -565,6 +597,8 @@@ static int prepare_workload(struct inte
                return ret;
        }
  
 +      update_shadow_pdps(workload);
 +
        ret = intel_vgpu_sync_oos_pages(workload->vgpu);
        if (ret) {
                gvt_vgpu_err("fail to vgpu sync oos pages\n");
@@@ -616,7 -643,7 +677,7 @@@ out
        if (!IS_ERR_OR_NULL(workload->req)) {
                gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
                                ring_id, workload->req);
 -              i915_add_request(workload->req);
 +              i915_request_add(workload->req);
                workload->dispatched = true;
        }
  
@@@ -803,7 -830,7 +864,7 @@@ static void complete_current_workload(s
                                workload->status = 0;
                }
  
 -              i915_gem_request_put(fetch_and_zero(&workload->req));
 +              i915_request_put(fetch_and_zero(&workload->req));
  
                if (!workload->status && !(vgpu->resetting_eng &
                                           ENGINE_MASK(ring_id))) {
@@@ -920,7 -947,7 +981,7 @@@ static int workload_thread(void *priv
  
                gvt_dbg_sched("ring id %d wait workload %p\n",
                                workload->ring_id, workload);
 -              i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
 +              i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
  
  complete:
                gvt_dbg_sched("will complete workload %p, status: %d\n",
@@@ -1078,10 -1105,12 +1139,12 @@@ int intel_vgpu_setup_submission(struct 
  
        bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
  
-       s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
-                       sizeof(struct intel_vgpu_workload), 0,
-                       SLAB_HWCACHE_ALIGN,
-                       NULL);
+       s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+                                                 sizeof(struct intel_vgpu_workload), 0,
+                                                 SLAB_HWCACHE_ALIGN,
+                                                 offsetof(struct intel_vgpu_workload, rb_tail),
+                                                 sizeof_field(struct intel_vgpu_workload, rb_tail),
+                                                 NULL);
  
        if (!s->workloads) {
                ret = -ENOMEM;
@@@ -1166,7 -1195,7 +1229,7 @@@ void intel_vgpu_destroy_workload(struc
        struct intel_vgpu_submission *s = &workload->vgpu->submission;
  
        if (workload->shadow_mm)
 -              intel_gvt_mm_unreference(workload->shadow_mm);
 +              intel_vgpu_mm_put(workload->shadow_mm);
  
        kmem_cache_free(s->workloads, workload);
  }
@@@ -1215,27 -1244,32 +1278,27 @@@ static int prepare_mm(struct intel_vgpu
        struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
        struct intel_vgpu_mm *mm;
        struct intel_vgpu *vgpu = workload->vgpu;
 -      int page_table_level;
 -      u32 pdp[8];
 +      intel_gvt_gtt_type_t root_entry_type;
 +      u64 pdps[GVT_RING_CTX_NR_PDPS];
  
 -      if (desc->addressing_mode == 1) { /* legacy 32-bit */
 -              page_table_level = 3;
 -      } else if (desc->addressing_mode == 3) { /* legacy 64 bit */
 -              page_table_level = 4;
 -      } else {
 +      switch (desc->addressing_mode) {
 +      case 1: /* legacy 32-bit */
 +              root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
 +              break;
 +      case 3: /* legacy 64-bit */
 +              root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
 +              break;
 +      default:
                gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
                return -EINVAL;
        }
  
 -      read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
 +      read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps);
  
 -      mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
 -      if (mm) {
 -              intel_gvt_mm_reference(mm);
 -      } else {
 +      mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps);
 +      if (IS_ERR(mm))
 +              return PTR_ERR(mm);
  
 -              mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
 -                              pdp, page_table_level, 0);
 -              if (IS_ERR(mm)) {
 -                      gvt_vgpu_err("fail to create mm object.\n");
 -                      return PTR_ERR(mm);
 -              }
 -      }
        workload->shadow_mm = mm;
        return 0;
  }
@@@ -80,7 -80,7 +80,7 @@@ struct intel_shadow_wa_ctx 
  struct intel_vgpu_workload {
        struct intel_vgpu *vgpu;
        int ring_id;
 -      struct drm_i915_gem_request *req;
 +      struct i915_request *req;
        /* if this workload has been dispatched to i915? */
        bool dispatched;
        bool shadowed;
        /* shadow batch buffer */
        struct list_head shadow_bb;
        struct intel_shadow_wa_ctx wa_ctx;
+       /* oa registers */
+       u32 oactxctrl;
+       u32 flex_mmio[7];
  };
  
  struct intel_vgpu_shadow_bb {
        u32 *bb_start_cmd_va;
        unsigned int clflush;
        bool accessing;
+       unsigned long bb_offset;
  };
  
  #define workload_q_head(vgpu, ring_id) \
@@@ -113,10 -113,10 +113,10 @@@ TRACE_EVENT(gma_index
  );
  
  TRACE_EVENT(gma_translate,
 -      TP_PROTO(int id, char *type, int ring_id, int pt_level,
 +      TP_PROTO(int id, char *type, int ring_id, int root_entry_type,
                unsigned long gma, unsigned long gpa),
  
 -      TP_ARGS(id, type, ring_id, pt_level, gma, gpa),
 +      TP_ARGS(id, type, ring_id, root_entry_type, gma, gpa),
  
        TP_STRUCT__entry(
                __array(char, buf, MAX_BUF_LEN)
  
        TP_fast_assign(
                snprintf(__entry->buf, MAX_BUF_LEN,
 -                      "VM%d %s ring %d pt_level %d gma 0x%lx -> gpa 0x%lx\n",
 -                              id, type, ring_id, pt_level, gma, gpa);
 +                      "VM%d %s ring %d root_entry_type %d gma 0x%lx -> gpa 0x%lx\n",
 +                      id, type, ring_id, root_entry_type, gma, gpa);
        ),
  
        TP_printk("%s", __entry->buf)
@@@ -168,7 -168,7 +168,7 @@@ TRACE_EVENT(spt_change
        TP_printk("%s", __entry->buf)
  );
  
 -TRACE_EVENT(gpt_change,
 +TRACE_EVENT(spt_guest_change,
        TP_PROTO(int id, const char *tag, void *spt, int type, u64 v,
                unsigned long index),
  
@@@ -333,7 -333,7 +333,7 @@@ TRACE_EVENT(render_mmio
        TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
                 unsigned int old_val, unsigned int new_val),
  
-       TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
+       TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
  
        TP_STRUCT__entry(
                __field(int, old_id)
@@@ -240,8 -240,8 +240,8 @@@ err_phys
  
  static void __start_cpu_write(struct drm_i915_gem_object *obj)
  {
 -      obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 -      obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 +      obj->read_domains = I915_GEM_DOMAIN_CPU;
 +      obj->write_domain = I915_GEM_DOMAIN_CPU;
        if (cpu_write_needs_clflush(obj))
                obj->cache_dirty = true;
  }
@@@ -257,7 -257,7 +257,7 @@@ __i915_gem_object_release_shmem(struct 
                obj->mm.dirty = false;
  
        if (needs_clflush &&
 -          (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 +          (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
            !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
                drm_clflush_sg(pages);
  
@@@ -353,7 -353,7 +353,7 @@@ i915_gem_object_wait_fence(struct dma_f
                           long timeout,
                           struct intel_rps_client *rps_client)
  {
 -      struct drm_i915_gem_request *rq;
 +      struct i915_request *rq;
  
        BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
  
                                              timeout);
  
        rq = to_request(fence);
 -      if (i915_gem_request_completed(rq))
 +      if (i915_request_completed(rq))
                goto out;
  
 -      /* This client is about to stall waiting for the GPU. In many cases
 +      /*
 +       * This client is about to stall waiting for the GPU. In many cases
         * this is undesirable and limits the throughput of the system, as
         * many clients cannot continue processing user input/output whilst
         * blocked. RPS autotuning may take tens of milliseconds to respond
         * forcing the clocks too high for the whole system, we only allow
         * each client to waitboost once in a busy period.
         */
 -      if (rps_client) {
 +      if (rps_client && !i915_request_started(rq)) {
                if (INTEL_GEN(rq->i915) >= 6)
                        gen6_rps_boost(rq, rps_client);
 -              else
 -                      rps_client = NULL;
        }
  
 -      timeout = i915_wait_request(rq, flags, timeout);
 +      timeout = i915_request_wait(rq, flags, timeout);
  
  out:
 -      if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 -              i915_gem_request_retire_upto(rq);
 +      if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
 +              i915_request_retire_upto(rq);
  
        return timeout;
  }
@@@ -433,20 -434,28 +433,28 @@@ i915_gem_object_wait_reservation(struc
                        dma_fence_put(shared[i]);
                kfree(shared);
  
+               /*
+                * If both shared fences and an exclusive fence exist,
+                * then by construction the shared fences must be later
+                * than the exclusive fence. If we successfully wait for
+                * all the shared fences, we know that the exclusive fence
+                * must all be signaled. If all the shared fences are
+                * signaled, we can prune the array and recover the
+                * floating references on the fences/requests.
+                */
                prune_fences = count && timeout >= 0;
        } else {
                excl = reservation_object_get_excl_rcu(resv);
        }
  
-       if (excl && timeout >= 0) {
+       if (excl && timeout >= 0)
                timeout = i915_gem_object_wait_fence(excl, flags, timeout,
                                                     rps_client);
-               prune_fences = timeout >= 0;
-       }
  
        dma_fence_put(excl);
  
-       /* Oportunistically prune the fences iff we know they have *all* been
+       /*
+        * Opportunistically prune the fences iff we know they have *all* been
         * signaled and that the reservation object has not been changed (i.e.
         * no new fences have been added).
         */
  
  static void __fence_set_priority(struct dma_fence *fence, int prio)
  {
 -      struct drm_i915_gem_request *rq;
 +      struct i915_request *rq;
        struct intel_engine_cs *engine;
  
        if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
  
        rq = to_request(fence);
        engine = rq->engine;
 -      if (!engine->schedule)
 -              return;
  
 -      engine->schedule(rq, prio);
 +      rcu_read_lock();
 +      if (engine->schedule)
 +              engine->schedule(rq, prio);
 +      rcu_read_unlock();
  }
  
  static void fence_set_priority(struct dma_fence *fence, int prio)
@@@ -704,10 -712,10 +712,10 @@@ flush_write_domain(struct drm_i915_gem_
        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        struct i915_vma *vma;
  
 -      if (!(obj->base.write_domain & flush_domains))
 +      if (!(obj->write_domain & flush_domains))
                return;
  
 -      switch (obj->base.write_domain) {
 +      switch (obj->write_domain) {
        case I915_GEM_DOMAIN_GTT:
                i915_gem_flush_ggtt_writes(dev_priv);
  
                break;
        }
  
 -      obj->base.write_domain = 0;
 +      obj->write_domain = 0;
  }
  
  static inline int
@@@ -832,7 -840,7 +840,7 @@@ int i915_gem_obj_prepare_shmem_read(str
         * anyway again before the next pread happens.
         */
        if (!obj->cache_dirty &&
 -          !(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 +          !(obj->read_domains & I915_GEM_DOMAIN_CPU))
                *needs_clflush = CLFLUSH_BEFORE;
  
  out:
@@@ -891,7 -899,7 +899,7 @@@ int i915_gem_obj_prepare_shmem_write(st
                 * Same trick applies to invalidate partially written
                 * cachelines read before writing.
                 */
 -              if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 +              if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
                        *needs_clflush |= CLFLUSH_BEFORE;
        }
  
@@@ -2392,8 -2400,8 +2400,8 @@@ static int i915_gem_object_get_pages_gt
         * wasn't in the GTT, there shouldn't be any way it could have been in
         * a GPU cache
         */
 -      GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
 -      GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
 +      GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
 +      GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
  
        st = kmalloc(sizeof(*st), GFP_KERNEL);
        if (st == NULL)
@@@ -2824,23 -2832,24 +2832,23 @@@ i915_gem_object_pwrite_gtt(struct drm_i
        return 0;
  }
  
 -static bool ban_context(const struct i915_gem_context *ctx,
 -                      unsigned int score)
 -{
 -      return (i915_gem_context_is_bannable(ctx) &&
 -              score >= CONTEXT_SCORE_BAN_THRESHOLD);
 -}
 -
  static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
  {
 -      unsigned int score;
        bool banned;
  
        atomic_inc(&ctx->guilty_count);
  
 -      score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
 -      banned = ban_context(ctx, score);
 -      DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
 -                       ctx->name, score, yesno(banned));
 +      banned = false;
 +      if (i915_gem_context_is_bannable(ctx)) {
 +              unsigned int score;
 +
 +              score = atomic_add_return(CONTEXT_SCORE_GUILTY,
 +                                        &ctx->ban_score);
 +              banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
 +
 +              DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
 +                               ctx->name, score, yesno(banned));
 +      }
        if (!banned)
                return;
  
@@@ -2857,10 -2866,10 +2865,10 @@@ static void i915_gem_context_mark_innoc
        atomic_inc(&ctx->active_count);
  }
  
 -struct drm_i915_gem_request *
 +struct i915_request *
  i915_gem_find_active_request(struct intel_engine_cs *engine)
  {
 -      struct drm_i915_gem_request *request, *active = NULL;
 +      struct i915_request *request, *active = NULL;
        unsigned long flags;
  
        /* We are called by the error capture and reset at a random
         */
        spin_lock_irqsave(&engine->timeline->lock, flags);
        list_for_each_entry(request, &engine->timeline->requests, link) {
 -              if (__i915_gem_request_completed(request,
 -                                               request->global_seqno))
 +              if (__i915_request_completed(request, request->global_seqno))
                        continue;
  
                GEM_BUG_ON(request->engine != engine);
@@@ -2906,10 -2916,10 +2914,10 @@@ static bool engine_stalled(struct intel
   * Ensure irq handler finishes, and not run again.
   * Also return the active request so that we only search for it once.
   */
 -struct drm_i915_gem_request *
 +struct i915_request *
  i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
  {
 -      struct drm_i915_gem_request *request = NULL;
 +      struct i915_request *request = NULL;
  
        /*
         * During the reset sequence, we must prevent the engine from
         * calling engine->init_hw() and also writing the ELSP.
         * Turning off the execlists->tasklet until the reset is over
         * prevents the race.
 +       *
 +       * Note that this needs to be a single atomic operation on the
 +       * tasklet (flush existing tasks, prevent new tasks) to prevent
 +       * a race between reset and set-wedged. It is not, so we do the best
 +       * we can atm and make sure we don't lock the machine up in the more
 +       * common case of recursively being called from set-wedged from inside
 +       * i915_reset.
         */
 -      tasklet_kill(&engine->execlists.tasklet);
 +      if (!atomic_read(&engine->execlists.tasklet.count))
 +              tasklet_kill(&engine->execlists.tasklet);
        tasklet_disable(&engine->execlists.tasklet);
  
        /*
  int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
  {
        struct intel_engine_cs *engine;
 -      struct drm_i915_gem_request *request;
 +      struct i915_request *request;
        enum intel_engine_id id;
        int err = 0;
  
        return err;
  }
  
 -static void skip_request(struct drm_i915_gem_request *request)
 +static void skip_request(struct i915_request *request)
  {
        void *vaddr = request->ring->vaddr;
        u32 head;
        dma_fence_set_error(&request->fence, -EIO);
  }
  
 -static void engine_skip_context(struct drm_i915_gem_request *request)
 +static void engine_skip_context(struct i915_request *request)
  {
        struct intel_engine_cs *engine = request->engine;
        struct i915_gem_context *hung_ctx = request->ctx;
  }
  
  /* Returns the request if it was guilty of the hang */
 -static struct drm_i915_gem_request *
 +static struct i915_request *
  i915_gem_reset_request(struct intel_engine_cs *engine,
 -                     struct drm_i915_gem_request *request)
 +                     struct i915_request *request)
  {
        /* The guilty request will get skipped on a hung engine.
         *
  }
  
  void i915_gem_reset_engine(struct intel_engine_cs *engine,
 -                         struct drm_i915_gem_request *request)
 +                         struct i915_request *request)
  {
        /*
         * Make sure this write is visible before we re-enable the interrupt
@@@ -3121,7 -3123,7 +3129,7 @@@ void i915_gem_reset(struct drm_i915_pri
  
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
  
 -      i915_gem_retire_requests(dev_priv);
 +      i915_retire_requests(dev_priv);
  
        for_each_engine(engine, dev_priv, id) {
                struct i915_gem_context *ctx;
                 * an incoherent read by the CS (presumably stale TLB). An
                 * empty request appears sufficient to paper over the glitch.
                 */
 -              if (list_empty(&engine->timeline->requests)) {
 -                      struct drm_i915_gem_request *rq;
 +              if (intel_engine_is_idle(engine)) {
 +                      struct i915_request *rq;
  
 -                      rq = i915_gem_request_alloc(engine,
 -                                                  dev_priv->kernel_context);
 +                      rq = i915_request_alloc(engine,
 +                                              dev_priv->kernel_context);
                        if (!IS_ERR(rq))
 -                              __i915_add_request(rq, false);
 +                              __i915_request_add(rq, false);
                }
        }
  
@@@ -3182,21 -3184,21 +3190,21 @@@ void i915_gem_reset_finish(struct drm_i
        }
  }
  
 -static void nop_submit_request(struct drm_i915_gem_request *request)
 +static void nop_submit_request(struct i915_request *request)
  {
        dma_fence_set_error(&request->fence, -EIO);
  
 -      i915_gem_request_submit(request);
 +      i915_request_submit(request);
  }
  
 -static void nop_complete_submit_request(struct drm_i915_gem_request *request)
 +static void nop_complete_submit_request(struct i915_request *request)
  {
        unsigned long flags;
  
        dma_fence_set_error(&request->fence, -EIO);
  
        spin_lock_irqsave(&request->engine->timeline->lock, flags);
 -      __i915_gem_request_submit(request);
 +      __i915_request_submit(request);
        intel_engine_init_global_seqno(request->engine, request->global_seqno);
        spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
  }
@@@ -3206,16 -3208,6 +3214,16 @@@ void i915_gem_set_wedged(struct drm_i91
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
  
 +      if (drm_debug & DRM_UT_DRIVER) {
 +              struct drm_printer p = drm_debug_printer(__func__);
 +
 +              for_each_engine(engine, i915, id)
 +                      intel_engine_dump(engine, &p, "%s\n", engine->name);
 +      }
 +
 +      set_bit(I915_WEDGED, &i915->gpu_error.flags);
 +      smp_mb__after_atomic();
 +
        /*
         * First, stop submission to hw, but do not yet complete requests by
         * rolling the global seqno forward (since this would complete requests
         */
        for_each_engine(engine, i915, id) {
                i915_gem_reset_prepare_engine(engine);
 +
                engine->submit_request = nop_submit_request;
 +              engine->schedule = NULL;
        }
 +      i915->caps.scheduler = 0;
  
        /*
         * Make sure no one is running the old callback before we proceed with
        for_each_engine(engine, i915, id) {
                unsigned long flags;
  
 -              /* Mark all pending requests as complete so that any concurrent
 +              /*
 +               * Mark all pending requests as complete so that any concurrent
                 * (lockless) lookup doesn't try and wait upon the request as we
                 * reset it.
                 */
                i915_gem_reset_finish_engine(engine);
        }
  
 -      set_bit(I915_WEDGED, &i915->gpu_error.flags);
        wake_up_all(&i915->gpu_error.reset_queue);
  }
  
@@@ -3293,7 -3282,7 +3301,7 @@@ bool i915_gem_unset_wedged(struct drm_i
         */
        list_for_each_entry(tl, &i915->gt.timelines, link) {
                for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
 -                      struct drm_i915_gem_request *rq;
 +                      struct i915_request *rq;
  
                        rq = i915_gem_active_peek(&tl->engine[i].last_request,
                                                  &i915->drm.struct_mutex);
@@@ -3342,7 -3331,7 +3350,7 @@@ i915_gem_retire_work_handler(struct wor
  
        /* Come back later if the device is busy... */
        if (mutex_trylock(&dev->struct_mutex)) {
 -              i915_gem_retire_requests(dev_priv);
 +              i915_retire_requests(dev_priv);
                mutex_unlock(&dev->struct_mutex);
        }
  
                                   round_jiffies_up_relative(HZ));
  }
  
 +static void shrink_caches(struct drm_i915_private *i915)
 +{
 +      /*
 +       * kmem_cache_shrink() discards empty slabs and reorders partially
 +       * filled slabs to prioritise allocating from the mostly full slabs,
 +       * with the aim of reducing fragmentation.
 +       */
 +      kmem_cache_shrink(i915->priorities);
 +      kmem_cache_shrink(i915->dependencies);
 +      kmem_cache_shrink(i915->requests);
 +      kmem_cache_shrink(i915->luts);
 +      kmem_cache_shrink(i915->vmas);
 +      kmem_cache_shrink(i915->objects);
 +}
 +
 +struct sleep_rcu_work {
 +      union {
 +              struct rcu_head rcu;
 +              struct work_struct work;
 +      };
 +      struct drm_i915_private *i915;
 +      unsigned int epoch;
 +};
 +
 +static inline bool
 +same_epoch(struct drm_i915_private *i915, unsigned int epoch)
 +{
 +      /*
 +       * There is a small chance that the epoch wrapped since we started
 +       * sleeping. If we assume that epoch is at least a u32, then it will
 +       * take at least 2^32 * 100ms for it to wrap, or about 326 years.
 +       */
 +      return epoch == READ_ONCE(i915->gt.epoch);
 +}
 +
 +static void __sleep_work(struct work_struct *work)
 +{
 +      struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
 +      struct drm_i915_private *i915 = s->i915;
 +      unsigned int epoch = s->epoch;
 +
 +      kfree(s);
 +      if (same_epoch(i915, epoch))
 +              shrink_caches(i915);
 +}
 +
 +static void __sleep_rcu(struct rcu_head *rcu)
 +{
 +      struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
 +      struct drm_i915_private *i915 = s->i915;
 +
 +      if (same_epoch(i915, s->epoch)) {
 +              INIT_WORK(&s->work, __sleep_work);
 +              queue_work(i915->wq, &s->work);
 +      } else {
 +              kfree(s);
 +      }
 +}
 +
  static inline bool
  new_requests_since_last_retire(const struct drm_i915_private *i915)
  {
@@@ -3428,24 -3358,26 +3436,24 @@@ i915_gem_idle_work_handler(struct work_
  {
        struct drm_i915_private *dev_priv =
                container_of(work, typeof(*dev_priv), gt.idle_work.work);
 +      unsigned int epoch = I915_EPOCH_INVALID;
        bool rearm_hangcheck;
 -      ktime_t end;
  
        if (!READ_ONCE(dev_priv->gt.awake))
                return;
  
        /*
         * Wait for last execlists context complete, but bail out in case a
 -       * new request is submitted.
 +       * new request is submitted. As we don't trust the hardware, we
 +       * continue on if the wait times out. This is necessary to allow
 +       * the machine to suspend even if the hardware dies, and we will
 +       * try to recover in resume (after depriving the hardware of power,
 +       * it may be in a better mmod).
         */
 -      end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
 -      do {
 -              if (new_requests_since_last_retire(dev_priv))
 -                      return;
 -
 -              if (intel_engines_are_idle(dev_priv))
 -                      break;
 -
 -              usleep_range(100, 500);
 -      } while (ktime_before(ktime_get(), end));
 +      __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
 +                 intel_engines_are_idle(dev_priv),
 +                 I915_IDLE_ENGINES_TIMEOUT * 1000,
 +                 10, 500);
  
        rearm_hangcheck =
                cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
  
        GEM_BUG_ON(!dev_priv->gt.awake);
        dev_priv->gt.awake = false;
 +      epoch = dev_priv->gt.epoch;
 +      GEM_BUG_ON(epoch == I915_EPOCH_INVALID);
        rearm_hangcheck = false;
  
        if (INTEL_GEN(dev_priv) >= 6)
@@@ -3503,23 -3433,6 +3511,23 @@@ out_rearm
                GEM_BUG_ON(!dev_priv->gt.awake);
                i915_queue_hangcheck(dev_priv);
        }
 +
 +      /*
 +       * When we are idle, it is an opportune time to reap our caches.
 +       * However, we have many objects that utilise RCU and the ordered
 +       * i915->wq that this work is executing on. To try and flush any
 +       * pending frees now we are idle, we first wait for an RCU grace
 +       * period, and then queue a task (that will run last on the wq) to
 +       * shrink and re-optimize the caches.
 +       */
 +      if (same_epoch(dev_priv, epoch)) {
 +              struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
 +              if (s) {
 +                      s->i915 = dev_priv;
 +                      s->epoch = epoch;
 +                      call_rcu(&s->rcu, __sleep_rcu);
 +              }
 +      }
  }
  
  void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
@@@ -3665,7 -3578,7 +3673,7 @@@ static int wait_for_engines(struct drm_
  
                        for_each_engine(engine, i915, id)
                                intel_engine_dump(engine, &p,
 -                                                "%s", engine->name);
 +                                                "%s\n", engine->name);
                }
  
                i915_gem_set_wedged(i915);
@@@ -3693,7 -3606,7 +3701,7 @@@ int i915_gem_wait_for_idle(struct drm_i
                        if (ret)
                                return ret;
                }
 -              i915_gem_retire_requests(i915);
 +              i915_retire_requests(i915);
  
                ret = wait_for_engines(i915);
        } else {
@@@ -3712,7 -3625,7 +3720,7 @@@ static void __i915_gem_object_flush_for
        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
        if (obj->cache_dirty)
                i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 -      obj->base.write_domain = 0;
 +      obj->write_domain = 0;
  }
  
  void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
@@@ -3749,7 -3662,7 +3757,7 @@@ i915_gem_object_set_to_wc_domain(struc
        if (ret)
                return ret;
  
 -      if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
 +      if (obj->write_domain == I915_GEM_DOMAIN_WC)
                return 0;
  
        /* Flush and acquire obj->pages so that we are coherent through
         * coherent writes from the GPU, by effectively invalidating the
         * WC domain upon first access.
         */
 -      if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
 +      if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
                mb();
  
        /* It should now be out of any other write domains, and we can update
         * the domain values for our changes.
         */
 -      GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 -      obj->base.read_domains |= I915_GEM_DOMAIN_WC;
 +      GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 +      obj->read_domains |= I915_GEM_DOMAIN_WC;
        if (write) {
 -              obj->base.read_domains = I915_GEM_DOMAIN_WC;
 -              obj->base.write_domain = I915_GEM_DOMAIN_WC;
 +              obj->read_domains = I915_GEM_DOMAIN_WC;
 +              obj->write_domain = I915_GEM_DOMAIN_WC;
                obj->mm.dirty = true;
        }
  
@@@ -3812,7 -3725,7 +3820,7 @@@ i915_gem_object_set_to_gtt_domain(struc
        if (ret)
                return ret;
  
 -      if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
 +      if (obj->write_domain == I915_GEM_DOMAIN_GTT)
                return 0;
  
        /* Flush and acquire obj->pages so that we are coherent through
         * coherent writes from the GPU, by effectively invalidating the
         * GTT domain upon first access.
         */
 -      if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
 +      if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
                mb();
  
        /* It should now be out of any other write domains, and we can update
         * the domain values for our changes.
         */
 -      GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 -      obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 +      GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 +      obj->read_domains |= I915_GEM_DOMAIN_GTT;
        if (write) {
 -              obj->base.read_domains = I915_GEM_DOMAIN_GTT;
 -              obj->base.write_domain = I915_GEM_DOMAIN_GTT;
 +              obj->read_domains = I915_GEM_DOMAIN_GTT;
 +              obj->write_domain = I915_GEM_DOMAIN_GTT;
                obj->mm.dirty = true;
        }
  
@@@ -4087,8 -4000,7 +4095,8 @@@ out
  struct i915_vma *
  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
                                     u32 alignment,
 -                                   const struct i915_ggtt_view *view)
 +                                   const struct i915_ggtt_view *view,
 +                                   unsigned int flags)
  {
        struct i915_vma *vma;
        int ret;
         * try to preserve the existing ABI).
         */
        vma = ERR_PTR(-ENOSPC);
 -      if (!view || view->type == I915_GGTT_VIEW_NORMAL)
 +      if ((flags & PIN_MAPPABLE) == 0 &&
 +          (!view || view->type == I915_GGTT_VIEW_NORMAL))
                vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 -                                             PIN_MAPPABLE | PIN_NONBLOCK);
 -      if (IS_ERR(vma)) {
 -              struct drm_i915_private *i915 = to_i915(obj->base.dev);
 -              unsigned int flags;
 -
 -              /* Valleyview is definitely limited to scanning out the first
 -               * 512MiB. Lets presume this behaviour was inherited from the
 -               * g4x display engine and that all earlier gen are similarly
 -               * limited. Testing suggests that it is a little more
 -               * complicated than this. For example, Cherryview appears quite
 -               * happy to scanout from anywhere within its global aperture.
 -               */
 -              flags = 0;
 -              if (HAS_GMCH_DISPLAY(i915))
 -                      flags = PIN_MAPPABLE;
 +                                             flags |
 +                                             PIN_MAPPABLE |
 +                                             PIN_NONBLOCK);
 +      if (IS_ERR(vma))
                vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
 -      }
        if (IS_ERR(vma))
                goto err_unpin_global;
  
        /* It should now be out of any other write domains, and we can update
         * the domain values for our changes.
         */
 -      obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 +      obj->read_domains |= I915_GEM_DOMAIN_GTT;
  
        return vma;
  
@@@ -4198,15 -4121,15 +4206,15 @@@ i915_gem_object_set_to_cpu_domain(struc
        flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
        /* Flush the CPU cache if it's still invalid. */
 -      if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
 +      if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
                i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 -              obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 +              obj->read_domains |= I915_GEM_DOMAIN_CPU;
        }
  
        /* It should now be out of any other write domains, and we can update
         * the domain values for our changes.
         */
 -      GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
 +      GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
  
        /* If we're writing through the CPU, then the GPU read domains will
         * need to be invalidated at next use.
@@@ -4233,7 -4156,7 +4241,7 @@@ i915_gem_ring_throttle(struct drm_devic
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct drm_i915_file_private *file_priv = file->driver_priv;
        unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
 -      struct drm_i915_gem_request *request, *target = NULL;
 +      struct i915_request *request, *target = NULL;
        long ret;
  
        /* ABI: return -EIO if already wedged */
                target = request;
        }
        if (target)
 -              i915_gem_request_get(target);
 +              i915_request_get(target);
        spin_unlock(&file_priv->mm.lock);
  
        if (target == NULL)
                return 0;
  
 -      ret = i915_wait_request(target,
 +      ret = i915_request_wait(target,
                                I915_WAIT_INTERRUPTIBLE,
                                MAX_SCHEDULE_TIMEOUT);
 -      i915_gem_request_put(target);
 +      i915_request_put(target);
  
        return ret < 0 ? ret : 0;
  }
@@@ -4281,8 -4204,7 +4289,8 @@@ i915_gem_object_ggtt_pin(struct drm_i91
  
        lockdep_assert_held(&obj->base.dev->struct_mutex);
  
 -      if (!view && flags & PIN_MAPPABLE) {
 +      if (flags & PIN_MAPPABLE &&
 +          (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
                /* If the required space is larger than the available
                 * aperture, we will not able to find a slot for the
                 * object and unbinding the object now will be in
@@@ -4376,7 -4298,7 +4384,7 @@@ static __always_inline unsigned in
  __busy_set_if_active(const struct dma_fence *fence,
                     unsigned int (*flag)(unsigned int id))
  {
 -      struct drm_i915_gem_request *rq;
 +      struct i915_request *rq;
  
        /* We have to check the current hw status of the fence as the uABI
         * guarantees forward progress. We could rely on the idle worker
                return 0;
  
        /* opencode to_request() in order to avoid const warnings */
 -      rq = container_of(fence, struct drm_i915_gem_request, fence);
 -      if (i915_gem_request_completed(rq))
 +      rq = container_of(fence, struct i915_request, fence);
 +      if (i915_request_completed(rq))
                return 0;
  
        return flag(rq->engine->uabi_id);
@@@ -4535,7 -4457,8 +4543,7 @@@ out
  }
  
  static void
 -frontbuffer_retire(struct i915_gem_active *active,
 -                 struct drm_i915_gem_request *request)
 +frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
  {
        struct drm_i915_gem_object *obj =
                container_of(active, typeof(*obj), frontbuffer_write);
@@@ -4642,8 -4565,8 +4650,8 @@@ i915_gem_object_create(struct drm_i915_
  
        i915_gem_object_init(obj, &i915_gem_object_ops);
  
 -      obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 -      obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 +      obj->write_domain = I915_GEM_DOMAIN_CPU;
 +      obj->read_domains = I915_GEM_DOMAIN_CPU;
  
        if (HAS_LLC(dev_priv))
                /* On some devices, we can have the GPU use the LLC (the CPU
@@@ -4757,9 -4680,6 +4765,9 @@@ static void __i915_gem_free_objects(str
                kfree(obj->bit_17);
                i915_gem_object_free(obj);
  
 +              GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
 +              atomic_dec(&i915->mm.free_count);
 +
                if (on)
                        cond_resched();
        }
@@@ -4790,8 -4710,7 +4798,8 @@@ static void __i915_gem_free_work(struc
                container_of(work, struct drm_i915_private, mm.free_work);
        struct llist_node *freed;
  
 -      /* All file-owned VMA should have been released by this point through
 +      /*
 +       * All file-owned VMA should have been released by this point through
         * i915_gem_close_object(), or earlier by i915_gem_context_close().
         * However, the object may also be bound into the global GTT (e.g.
         * older GPUs without per-process support, or for direct access through
@@@ -4818,18 -4737,13 +4826,18 @@@ static void __i915_gem_free_object_rcu(
                container_of(head, typeof(*obj), rcu);
        struct drm_i915_private *i915 = to_i915(obj->base.dev);
  
 -      /* We can't simply use call_rcu() from i915_gem_free_object()
 -       * as we need to block whilst unbinding, and the call_rcu
 -       * task may be called from softirq context. So we take a
 -       * detour through a worker.
 +      /*
 +       * Since we require blocking on struct_mutex to unbind the freed
 +       * object from the GPU before releasing resources back to the
 +       * system, we can not do that directly from the RCU callback (which may
 +       * be a softirq context), but must instead then defer that work onto a
 +       * kthread. We use the RCU callback rather than move the freed object
 +       * directly onto the work queue so that we can mix between using the
 +       * worker and performing frees directly from subsequent allocations for
 +       * crude but effective memory throttling.
         */
        if (llist_add(&obj->freed, &i915->mm.free_list))
 -              schedule_work(&i915->mm.free_work);
 +              queue_work(i915->wq, &i915->mm.free_work);
  }
  
  void i915_gem_free_object(struct drm_gem_object *gem_obj)
        if (discard_backing_storage(obj))
                obj->mm.madv = I915_MADV_DONTNEED;
  
 -      /* Before we free the object, make sure any pure RCU-only
 +      /*
 +       * Before we free the object, make sure any pure RCU-only
         * read-side critical sections are complete, e.g.
         * i915_gem_busy_ioctl(). For the corresponding synchronized
         * lookup see i915_gem_object_lookup_rcu().
         */
 +      atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
        call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
  }
  
@@@ -4891,8 -4803,10 +4899,8 @@@ void i915_gem_sanitize(struct drm_i915_
         * it may impact the display and we are uncertain about the stability
         * of the reset, so this could be applied to even earlier gen.
         */
 -      if (INTEL_GEN(i915) >= 5) {
 -              int reset = intel_gpu_reset(i915, ALL_ENGINES);
 -              WARN_ON(reset && reset != -ENODEV);
 -      }
 +      if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
 +              WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
  }
  
  int i915_gem_suspend(struct drm_i915_private *dev_priv)
        i915_gem_contexts_lost(dev_priv);
        mutex_unlock(&dev->struct_mutex);
  
 -      intel_guc_suspend(dev_priv);
 +      intel_uc_suspend(dev_priv);
  
        cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
        cancel_delayed_work_sync(&dev_priv->gt.retire_work);
@@@ -4996,7 -4910,7 +5004,7 @@@ void i915_gem_resume(struct drm_i915_pr
        if (i915_gem_init_hw(i915))
                goto err_wedged;
  
 -      intel_guc_resume(i915);
 +      intel_uc_resume(i915);
  
        /* Always reload a context for powersaving. */
        if (i915_gem_switch_to_kernel_context(i915))
@@@ -5072,11 -4986,8 +5080,11 @@@ static int __i915_gem_restart_engines(v
  
        for_each_engine(engine, i915, id) {
                err = engine->init_hw(engine);
 -              if (err)
 +              if (err) {
 +                      DRM_ERROR("Failed to restart %s (%d)\n",
 +                                engine->name, err);
                        return err;
 +              }
        }
  
        return 0;
@@@ -5128,16 -5039,14 +5136,16 @@@ int i915_gem_init_hw(struct drm_i915_pr
  
        ret = i915_ppgtt_init_hw(dev_priv);
        if (ret) {
 -              DRM_ERROR("PPGTT enable HW failed %d\n", ret);
 +              DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
                goto out;
        }
  
        /* We can't enable contexts until all firmware is loaded */
        ret = intel_uc_init_hw(dev_priv);
 -      if (ret)
 +      if (ret) {
 +              DRM_ERROR("Enabling uc failed (%d)\n", ret);
                goto out;
 +      }
  
        intel_mocs_init_l3cc_table(dev_priv);
  
@@@ -5169,9 -5078,9 +5177,9 @@@ static int __intel_engines_record_defau
                return PTR_ERR(ctx);
  
        for_each_engine(engine, i915, id) {
 -              struct drm_i915_gem_request *rq;
 +              struct i915_request *rq;
  
 -              rq = i915_gem_request_alloc(engine, ctx);
 +              rq = i915_request_alloc(engine, ctx);
                if (IS_ERR(rq)) {
                        err = PTR_ERR(rq);
                        goto out_ctx;
                if (engine->init_context)
                        err = engine->init_context(rq);
  
 -              __i915_add_request(rq, true);
 +              __i915_request_add(rq, true);
                if (err)
                        goto err_active;
        }
@@@ -5289,7 -5198,7 +5297,7 @@@ int i915_gem_init(struct drm_i915_priva
        if (ret)
                return ret;
  
 -      ret = intel_uc_init_wq(dev_priv);
 +      ret = intel_uc_init_misc(dev_priv);
        if (ret)
                return ret;
  
@@@ -5385,7 -5294,7 +5393,7 @@@ err_unlock
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
        mutex_unlock(&dev_priv->drm.struct_mutex);
  
 -      intel_uc_fini_wq(dev_priv);
 +      intel_uc_fini_misc(dev_priv);
  
        if (ret != -EIO)
                i915_gem_cleanup_userptr(dev_priv);
@@@ -5427,10 -5336,10 +5435,10 @@@ i915_gem_load_init_fences(struct drm_i9
  {
        int i;
  
 -      if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
 +      if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
            !IS_CHERRYVIEW(dev_priv))
                dev_priv->num_fence_regs = 32;
 -      else if (INTEL_INFO(dev_priv)->gen >= 4 ||
 +      else if (INTEL_GEN(dev_priv) >= 4 ||
                 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
                 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
                dev_priv->num_fence_regs = 16;
@@@ -5487,7 -5396,7 +5495,7 @@@ i915_gem_load_init(struct drm_i915_priv
        if (!dev_priv->luts)
                goto err_vmas;
  
 -      dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 +      dev_priv->requests = KMEM_CACHE(i915_request,
                                        SLAB_HWCACHE_ALIGN |
                                        SLAB_RECLAIM_ACCOUNT |
                                        SLAB_TYPESAFE_BY_RCU);
@@@ -5549,8 -5458,7 +5557,8 @@@ err_out
  void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
  {
        i915_gem_drain_freed_objects(dev_priv);
 -      WARN_ON(!llist_empty(&dev_priv->mm.free_list));
 +      GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 +      GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
        WARN_ON(dev_priv->mm.object_count);
  
        mutex_lock(&dev_priv->drm.struct_mutex);
@@@ -5620,7 -5528,7 +5628,7 @@@ int i915_gem_freeze_late(struct drm_i91
  void i915_gem_release(struct drm_device *dev, struct drm_file *file)
  {
        struct drm_i915_file_private *file_priv = file->driver_priv;
 -      struct drm_i915_gem_request *request;
 +      struct i915_request *request;
  
        /* Clean up our request list when the client is going away, so that
         * later retire_requests won't dereference our soon-to-be-gone
@@@ -5706,7 -5614,7 +5714,7 @@@ i915_gem_object_create_from_data(struc
        if (IS_ERR(obj))
                return obj;
  
 -      GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 +      GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
  
        file = obj->base.filp;
        offset = 0;
@@@ -25,7 -25,6 +25,7 @@@
   *
   */
  
 +#include <drm/drm_scdc_helper.h>
  #include "i915_drv.h"
  #include "intel_drv.h"
  
@@@ -1616,35 -1615,6 +1616,35 @@@ void intel_ddi_disable_transcoder_func(
        I915_WRITE(reg, val);
  }
  
 +int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder,
 +                                   bool enable)
 +{
 +      struct drm_device *dev = intel_encoder->base.dev;
 +      struct drm_i915_private *dev_priv = to_i915(dev);
 +      enum pipe pipe = 0;
 +      int ret = 0;
 +      uint32_t tmp;
 +
 +      if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv,
 +                                              intel_encoder->power_domain)))
 +              return -ENXIO;
 +
 +      if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) {
 +              ret = -EIO;
 +              goto out;
 +      }
 +
 +      tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe));
 +      if (enable)
 +              tmp |= TRANS_DDI_HDCP_SIGNALLING;
 +      else
 +              tmp &= ~TRANS_DDI_HDCP_SIGNALLING;
 +      I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp);
 +out:
 +      intel_display_power_put(dev_priv, intel_encoder->power_domain);
 +      return ret;
 +}
 +
  bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
  {
        struct drm_device *dev = intel_connector->base.dev;
@@@ -2153,7 -2123,7 +2153,7 @@@ static void intel_ddi_clk_select(struc
  
                I915_WRITE(DPLL_CTRL2, val);
  
 -      } else if (INTEL_INFO(dev_priv)->gen < 9) {
 +      } else if (INTEL_GEN(dev_priv) < 9) {
                I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
        }
  
@@@ -2205,8 -2175,7 +2205,7 @@@ static void intel_ddi_pre_enable_dp(str
                intel_prepare_dp_ddi_buffers(encoder, crtc_state);
  
        intel_ddi_init_dp_buf_reg(encoder);
-       if (!is_mst)
-               intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+       intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
        intel_dp_start_link_train(intel_dp);
        if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
                intel_dp_stop_link_train(intel_dp);
@@@ -2304,14 -2273,12 +2303,12 @@@ static void intel_ddi_post_disable_dp(s
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
        struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
        struct intel_dp *intel_dp = &dig_port->dp;
-       bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST);
  
        /*
         * Power down sink before disabling the port, otherwise we end
         * up getting interrupts from the sink on detecting link loss.
         */
-       if (!is_mst)
-               intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+       intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
  
        intel_disable_ddi_buf(encoder);
  
@@@ -2434,48 -2401,6 +2431,48 @@@ static void intel_enable_ddi_hdmi(struc
                                          crtc_state->hdmi_high_tmds_clock_ratio,
                                          crtc_state->hdmi_scrambling);
  
 +      /* Display WA #1143: skl,kbl,cfl */
 +      if (IS_GEN9_BC(dev_priv)) {
 +              /*
 +               * For some reason these chicken bits have been
 +               * stuffed into a transcoder register, event though
 +               * the bits affect a specific DDI port rather than
 +               * a specific transcoder.
 +               */
 +              static const enum transcoder port_to_transcoder[] = {
 +                      [PORT_A] = TRANSCODER_EDP,
 +                      [PORT_B] = TRANSCODER_A,
 +                      [PORT_C] = TRANSCODER_B,
 +                      [PORT_D] = TRANSCODER_C,
 +                      [PORT_E] = TRANSCODER_A,
 +              };
 +              enum transcoder transcoder = port_to_transcoder[port];
 +              u32 val;
 +
 +              val = I915_READ(CHICKEN_TRANS(transcoder));
 +
 +              if (port == PORT_E)
 +                      val |= DDIE_TRAINING_OVERRIDE_ENABLE |
 +                              DDIE_TRAINING_OVERRIDE_VALUE;
 +              else
 +                      val |= DDI_TRAINING_OVERRIDE_ENABLE |
 +                              DDI_TRAINING_OVERRIDE_VALUE;
 +
 +              I915_WRITE(CHICKEN_TRANS(transcoder), val);
 +              POSTING_READ(CHICKEN_TRANS(transcoder));
 +
 +              udelay(1);
 +
 +              if (port == PORT_E)
 +                      val &= ~(DDIE_TRAINING_OVERRIDE_ENABLE |
 +                               DDIE_TRAINING_OVERRIDE_VALUE);
 +              else
 +                      val &= ~(DDI_TRAINING_OVERRIDE_ENABLE |
 +                               DDI_TRAINING_OVERRIDE_VALUE);
 +
 +              I915_WRITE(CHICKEN_TRANS(transcoder), val);
 +      }
 +
        /* In HDMI/DVI mode, the port width, and swing/emphasis values
         * are ignored so nothing special needs to be done besides
         * enabling the port.
@@@ -2495,11 -2420,6 +2492,11 @@@ static void intel_enable_ddi(struct int
                intel_enable_ddi_hdmi(encoder, crtc_state, conn_state);
        else
                intel_enable_ddi_dp(encoder, crtc_state, conn_state);
 +
 +      /* Enable hdcp if it's desired */
 +      if (conn_state->content_protection ==
 +          DRM_MODE_CONTENT_PROTECTION_DESIRED)
 +              intel_hdcp_enable(to_intel_connector(conn_state->connector));
  }
  
  static void intel_disable_ddi_dp(struct intel_encoder *encoder,
  {
        struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
  
 +      intel_dp->link_trained = false;
 +
        if (old_crtc_state->has_audio)
                intel_audio_codec_disable(encoder,
                                          old_crtc_state, old_conn_state);
@@@ -2536,8 -2454,6 +2533,8 @@@ static void intel_disable_ddi(struct in
                              const struct intel_crtc_state *old_crtc_state,
                              const struct drm_connector_state *old_conn_state)
  {
 +      intel_hdcp_disable(to_intel_connector(old_conn_state->connector));
 +
        if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI))
                intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state);
        else
@@@ -2801,150 -2717,6 +2798,150 @@@ intel_ddi_init_dp_connector(struct inte
        return connector;
  }
  
 +static int modeset_pipe(struct drm_crtc *crtc,
 +                      struct drm_modeset_acquire_ctx *ctx)
 +{
 +      struct drm_atomic_state *state;
 +      struct drm_crtc_state *crtc_state;
 +      int ret;
 +
 +      state = drm_atomic_state_alloc(crtc->dev);
 +      if (!state)
 +              return -ENOMEM;
 +
 +      state->acquire_ctx = ctx;
 +
 +      crtc_state = drm_atomic_get_crtc_state(state, crtc);
 +      if (IS_ERR(crtc_state)) {
 +              ret = PTR_ERR(crtc_state);
 +              goto out;
 +      }
 +
 +      crtc_state->mode_changed = true;
 +
 +      ret = drm_atomic_add_affected_connectors(state, crtc);
 +      if (ret)
 +              goto out;
 +
 +      ret = drm_atomic_add_affected_planes(state, crtc);
 +      if (ret)
 +              goto out;
 +
 +      ret = drm_atomic_commit(state);
 +      if (ret)
 +              goto out;
 +
 +      return 0;
 +
 + out:
 +      drm_atomic_state_put(state);
 +
 +      return ret;
 +}
 +
 +static int intel_hdmi_reset_link(struct intel_encoder *encoder,
 +                               struct drm_modeset_acquire_ctx *ctx)
 +{
 +      struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 +      struct intel_hdmi *hdmi = enc_to_intel_hdmi(&encoder->base);
 +      struct intel_connector *connector = hdmi->attached_connector;
 +      struct i2c_adapter *adapter =
 +              intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
 +      struct drm_connector_state *conn_state;
 +      struct intel_crtc_state *crtc_state;
 +      struct intel_crtc *crtc;
 +      u8 config;
 +      int ret;
 +
 +      if (!connector || connector->base.status != connector_status_connected)
 +              return 0;
 +
 +      ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
 +                             ctx);
 +      if (ret)
 +              return ret;
 +
 +      conn_state = connector->base.state;
 +
 +      crtc = to_intel_crtc(conn_state->crtc);
 +      if (!crtc)
 +              return 0;
 +
 +      ret = drm_modeset_lock(&crtc->base.mutex, ctx);
 +      if (ret)
 +              return ret;
 +
 +      crtc_state = to_intel_crtc_state(crtc->base.state);
 +
 +      WARN_ON(!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI));
 +
 +      if (!crtc_state->base.active)
 +              return 0;
 +
 +      if (!crtc_state->hdmi_high_tmds_clock_ratio &&
 +          !crtc_state->hdmi_scrambling)
 +              return 0;
 +
 +      if (conn_state->commit &&
 +          !try_wait_for_completion(&conn_state->commit->hw_done))
 +              return 0;
 +
 +      ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
 +      if (ret < 0) {
 +              DRM_ERROR("Failed to read TMDS config: %d\n", ret);
 +              return 0;
 +      }
 +
 +      if (!!(config & SCDC_TMDS_BIT_CLOCK_RATIO_BY_40) ==
 +          crtc_state->hdmi_high_tmds_clock_ratio &&
 +          !!(config & SCDC_SCRAMBLING_ENABLE) ==
 +          crtc_state->hdmi_scrambling)
 +              return 0;
 +
 +      /*
 +       * HDMI 2.0 says that one should not send scrambled data
 +       * prior to configuring the sink scrambling, and that
 +       * TMDS clock/data transmission should be suspended when
 +       * changing the TMDS clock rate in the sink. So let's
 +       * just do a full modeset here, even though some sinks
 +       * would be perfectly happy if were to just reconfigure
 +       * the SCDC settings on the fly.
 +       */
 +      return modeset_pipe(&crtc->base, ctx);
 +}
 +
 +static bool intel_ddi_hotplug(struct intel_encoder *encoder,
 +                            struct intel_connector *connector)
 +{
 +      struct drm_modeset_acquire_ctx ctx;
 +      bool changed;
 +      int ret;
 +
 +      changed = intel_encoder_hotplug(encoder, connector);
 +
 +      drm_modeset_acquire_init(&ctx, 0);
 +
 +      for (;;) {
 +              if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA)
 +                      ret = intel_hdmi_reset_link(encoder, &ctx);
 +              else
 +                      ret = intel_dp_retrain_link(encoder, &ctx);
 +
 +              if (ret == -EDEADLK) {
 +                      drm_modeset_backoff(&ctx);
 +                      continue;
 +              }
 +
 +              break;
 +      }
 +
 +      drm_modeset_drop_locks(&ctx);
 +      drm_modeset_acquire_fini(&ctx);
 +      WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
 +
 +      return changed;
 +}
 +
  static struct intel_connector *
  intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port)
  {
@@@ -2989,45 -2761,39 +2986,45 @@@ static bool intel_ddi_a_force_4_lanes(s
        return false;
  }
  
 +static int
 +intel_ddi_max_lanes(struct intel_digital_port *intel_dport)
 +{
 +      struct drm_i915_private *dev_priv = to_i915(intel_dport->base.base.dev);
 +      enum port port = intel_dport->base.port;
 +      int max_lanes = 4;
 +
 +      if (INTEL_GEN(dev_priv) >= 11)
 +              return max_lanes;
 +
 +      if (port == PORT_A || port == PORT_E) {
 +              if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES)
 +                      max_lanes = port == PORT_A ? 4 : 0;
 +              else
 +                      /* Both A and E share 2 lanes */
 +                      max_lanes = 2;
 +      }
 +
 +      /*
 +       * Some BIOS might fail to set this bit on port A if eDP
 +       * wasn't lit up at boot.  Force this bit set when needed
 +       * so we use the proper lane count for our calculations.
 +       */
 +      if (intel_ddi_a_force_4_lanes(intel_dport)) {
 +              DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n");
 +              intel_dport->saved_port_bits |= DDI_A_4_LANES;
 +              max_lanes = 4;
 +      }
 +
 +      return max_lanes;
 +}
 +
  void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
  {
        struct intel_digital_port *intel_dig_port;
        struct intel_encoder *intel_encoder;
        struct drm_encoder *encoder;
        bool init_hdmi, init_dp, init_lspcon = false;
 -      int max_lanes;
  
 -      if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) {
 -              switch (port) {
 -              case PORT_A:
 -                      max_lanes = 4;
 -                      break;
 -              case PORT_E:
 -                      max_lanes = 0;
 -                      break;
 -              default:
 -                      max_lanes = 4;
 -                      break;
 -              }
 -      } else {
 -              switch (port) {
 -              case PORT_A:
 -                      max_lanes = 2;
 -                      break;
 -              case PORT_E:
 -                      max_lanes = 2;
 -                      break;
 -              default:
 -                      max_lanes = 4;
 -                      break;
 -              }
 -      }
  
        init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi ||
                     dev_priv->vbt.ddi_port_info[port].supports_hdmi);
        drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
                         DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
  
 +      intel_encoder->hotplug = intel_ddi_hotplug;
        intel_encoder->compute_output_type = intel_ddi_compute_output_type;
        intel_encoder->compute_config = intel_ddi_compute_config;
        intel_encoder->enable = intel_enable_ddi;
        intel_encoder->get_config = intel_ddi_get_config;
        intel_encoder->suspend = intel_dp_encoder_suspend;
        intel_encoder->get_power_domains = intel_ddi_get_power_domains;
 +      intel_encoder->type = INTEL_OUTPUT_DDI;
 +      intel_encoder->power_domain = intel_port_to_power_domain(port);
 +      intel_encoder->port = port;
 +      intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 +      intel_encoder->cloneable = 0;
  
 -      intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
 -                                        (DDI_BUF_PORT_REVERSAL |
 -                                         DDI_A_4_LANES);
 +      if (INTEL_GEN(dev_priv) >= 11)
 +              intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
 +                      DDI_BUF_PORT_REVERSAL;
 +      else
 +              intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
 +                      (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES);
 +      intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 +      intel_dig_port->max_lanes = intel_ddi_max_lanes(intel_dig_port);
  
        switch (port) {
        case PORT_A:
                intel_dig_port->ddi_io_power_domain =
                        POWER_DOMAIN_PORT_DDI_E_IO;
                break;
 +      case PORT_F:
 +              intel_dig_port->ddi_io_power_domain =
 +                      POWER_DOMAIN_PORT_DDI_F_IO;
 +              break;
        default:
                MISSING_CASE(port);
        }
  
 -      /*
 -       * Some BIOS might fail to set this bit on port A if eDP
 -       * wasn't lit up at boot.  Force this bit set when needed
 -       * so we use the proper lane count for our calculations.
 -       */
 -      if (intel_ddi_a_force_4_lanes(intel_dig_port)) {
 -              DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n");
 -              intel_dig_port->saved_port_bits |= DDI_A_4_LANES;
 -              max_lanes = 4;
 -      }
 -
 -      intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 -      intel_dig_port->max_lanes = max_lanes;
 -
 -      intel_encoder->type = INTEL_OUTPUT_DDI;
 -      intel_encoder->power_domain = intel_port_to_power_domain(port);
 -      intel_encoder->port = port;
 -      intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 -      intel_encoder->cloneable = 0;
 -
        intel_infoframe_init(intel_dig_port);
  
        if (init_dp) {
@@@ -246,7 -246,7 +246,7 @@@ engine_stuck(struct intel_engine_cs *en
         */
        tmp = I915_READ_CTL(engine);
        if (tmp & RING_WAIT) {
-               i915_handle_error(dev_priv, 0,
+               i915_handle_error(dev_priv, BIT(engine->id),
                                  "Kicking stuck wait on %s",
                                  engine->name);
                I915_WRITE_CTL(engine, tmp);
                default:
                        return ENGINE_DEAD;
                case 1:
-                       i915_handle_error(dev_priv, 0,
+                       i915_handle_error(dev_priv, ALL_ENGINES,
                                          "Kicking stuck semaphore on %s",
                                          engine->name);
                        I915_WRITE_CTL(engine, tmp);
@@@ -359,7 -359,7 +359,7 @@@ static void hangcheck_accumulate_sample
        case ENGINE_DEAD:
                if (drm_debug & DRM_UT_DRIVER) {
                        struct drm_printer p = drm_debug_printer("hangcheck");
 -                      intel_engine_dump(engine, &p, "%s", engine->name);
 +                      intel_engine_dump(engine, &p, "%s\n", engine->name);
                }
                break;
  
@@@ -22,6 -22,7 +22,7 @@@
  #include <drm/drm_plane_helper.h>
  
  #include "video/imx-ipu-v3.h"
+ #include "imx-drm.h"
  #include "ipuv3-plane.h"
  
  struct ipu_plane_state {
@@@ -272,7 -273,7 +273,7 @@@ static void ipu_plane_destroy(struct dr
        kfree(ipu_plane);
  }
  
- void ipu_plane_state_reset(struct drm_plane *plane)
static void ipu_plane_state_reset(struct drm_plane *plane)
  {
        struct ipu_plane_state *ipu_state;
  
        plane->state = &ipu_state->base;
  }
  
- struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane)
+ static struct drm_plane_state *
+ ipu_plane_duplicate_state(struct drm_plane *plane)
  {
        struct ipu_plane_state *state;
  
        return &state->base;
  }
  
- void ipu_plane_destroy_state(struct drm_plane *plane,
-                            struct drm_plane_state *state)
static void ipu_plane_destroy_state(struct drm_plane *plane,
+                                   struct drm_plane_state *state)
  {
        struct ipu_plane_state *ipu_state = to_ipu_plane_state(state);
  
@@@ -351,6 -353,7 +353,6 @@@ static int ipu_plane_atomic_check(struc
        struct drm_framebuffer *old_fb = old_state->fb;
        unsigned long eba, ubo, vbo, old_ubo, old_vbo, alpha_eba;
        bool can_position = (plane->type == DRM_PLANE_TYPE_OVERLAY);
 -      struct drm_rect clip;
        int hsub, vsub;
        int ret;
  
        if (WARN_ON(!crtc_state))
                return -EINVAL;
  
 -      clip.x1 = 0;
 -      clip.y1 = 0;
 -      clip.x2 = crtc_state->adjusted_mode.hdisplay;
 -      clip.y2 = crtc_state->adjusted_mode.vdisplay;
 -      ret = drm_atomic_helper_check_plane_state(state, crtc_state, &clip,
 +      ret = drm_atomic_helper_check_plane_state(state, crtc_state,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  can_position, true);
@@@ -49,6 -49,7 +49,6 @@@ static int meson_plane_atomic_check(str
                                    struct drm_plane_state *state)
  {
        struct drm_crtc_state *crtc_state;
 -      struct drm_rect clip = { 0, };
  
        if (!state->crtc)
                return 0;
        if (IS_ERR(crtc_state))
                return PTR_ERR(crtc_state);
  
 -      clip.x2 = crtc_state->mode.hdisplay;
 -      clip.y2 = crtc_state->mode.vdisplay;
 -
 -      return drm_atomic_helper_check_plane_state(state, crtc_state, &clip,
 +      return drm_atomic_helper_check_plane_state(state, crtc_state,
                                                   DRM_PLANE_HELPER_NO_SCALING,
                                                   DRM_PLANE_HELPER_NO_SCALING,
                                                   true, true);
@@@ -160,10 -164,9 +160,9 @@@ static void meson_plane_atomic_update(s
        /* Update Canvas with buffer address */
        gem = drm_fb_cma_get_gem_obj(fb, 0);
  
-       meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1,
-                          gem->paddr, fb->pitches[0],
-                          fb->height, MESON_CANVAS_WRAP_NONE,
-                          MESON_CANVAS_BLKMODE_LINEAR);
+       priv->viu.osd1_addr = gem->paddr;
+       priv->viu.osd1_stride = fb->pitches[0];
+       priv->viu.osd1_height = fb->height;
  
        spin_unlock_irqrestore(&priv->drm->event_lock, flags);
  }
@@@ -232,6 -232,8 +232,6 @@@ struct nv50_wndw_atom 
        struct drm_plane_state state;
        u8 interval;
  
 -      struct drm_rect clip;
 -
        struct {
                u32  handle;
                u16  offset:12;
@@@ -846,6 -848,10 +846,6 @@@ nv50_wndw_atomic_check_acquire(struct n
        int ret;
  
        NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name);
 -      asyw->clip.x1 = 0;
 -      asyw->clip.y1 = 0;
 -      asyw->clip.x2 = asyh->state.mode.hdisplay;
 -      asyw->clip.y2 = asyh->state.mode.vdisplay;
  
        asyw->image.w = fb->base.width;
        asyw->image.h = fb->base.height;
@@@ -1146,6 -1152,7 +1146,6 @@@ nv50_curs_acquire(struct nv50_wndw *wnd
        int ret;
  
        ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
 -                                                &asyw->clip,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  true, true);
@@@ -1435,6 -1442,7 +1435,6 @@@ nv50_base_acquire(struct nv50_wndw *wnd
                return -EINVAL;
  
        ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
 -                                                &asyw->clip,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  false, true);
@@@ -4469,6 -4477,7 +4469,7 @@@ nv50_display_create(struct drm_device *
        nouveau_display(dev)->fini = nv50_display_fini;
        disp->disp = &nouveau_display(dev)->disp;
        dev->mode_config.funcs = &nv50_disp_func;
+       dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP;
        if (nouveau_atomic)
                dev->driver->driver_features |= DRIVER_ATOMIC;
  
@@@ -25,7 -25,6 +25,7 @@@
  
  #include <video/videomode.h>
  
 +#include "sun4i_backend.h"
  #include "sun4i_crtc.h"
  #include "sun4i_drv.h"
  #include "sunxi_engine.h"
@@@ -47,25 -46,11 +47,25 @@@ static struct drm_encoder *sun4i_crtc_g
        return NULL;
  }
  
 +static int sun4i_crtc_atomic_check(struct drm_crtc *crtc,
 +                                  struct drm_crtc_state *state)
 +{
 +      struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
 +      struct sunxi_engine *engine = scrtc->engine;
 +      int ret = 0;
 +
 +      if (engine && engine->ops && engine->ops->atomic_check)
 +              ret = engine->ops->atomic_check(engine, state);
 +
 +      return ret;
 +}
 +
  static void sun4i_crtc_atomic_begin(struct drm_crtc *crtc,
                                    struct drm_crtc_state *old_state)
  {
        struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
        struct drm_device *dev = crtc->dev;
 +      struct sunxi_engine *engine = scrtc->engine;
        unsigned long flags;
  
        if (crtc->state->event) {
                scrtc->event = crtc->state->event;
                spin_unlock_irqrestore(&dev->event_lock, flags);
                crtc->state->event = NULL;
 -       }
 +      }
 +
 +      if (engine->ops->atomic_begin)
 +              engine->ops->atomic_begin(engine, old_state);
  }
  
  static void sun4i_crtc_atomic_flush(struct drm_crtc *crtc,
@@@ -111,6 -93,8 +111,8 @@@ static void sun4i_crtc_atomic_disable(s
  
        DRM_DEBUG_DRIVER("Disabling the CRTC\n");
  
+       drm_crtc_vblank_off(crtc);
        sun4i_tcon_set_status(scrtc->tcon, encoder, false);
  
        if (crtc->state->event && !crtc->state->active) {
@@@ -131,6 -115,8 +133,8 @@@ static void sun4i_crtc_atomic_enable(st
        DRM_DEBUG_DRIVER("Enabling the CRTC\n");
  
        sun4i_tcon_set_status(scrtc->tcon, encoder, true);
+       drm_crtc_vblank_on(crtc);
  }
  
  static void sun4i_crtc_mode_set_nofb(struct drm_crtc *crtc)
  }
  
  static const struct drm_crtc_helper_funcs sun4i_crtc_helper_funcs = {
 +      .atomic_check   = sun4i_crtc_atomic_check,
        .atomic_begin   = sun4i_crtc_atomic_begin,
        .atomic_flush   = sun4i_crtc_atomic_flush,
        .atomic_enable  = sun4i_crtc_atomic_enable,
@@@ -23,7 -23,6 +23,7 @@@
  #include <drm/drm_of.h>
  
  #include "sun4i_drv.h"
 +#include "sun4i_frontend.h"
  #include "sun4i_framebuffer.h"
  #include "sun4i_tcon.h"
  
@@@ -92,7 -91,6 +92,7 @@@ static int sun4i_drv_bind(struct devic
                goto free_drm;
        }
        drm->dev_private = drv;
 +      INIT_LIST_HEAD(&drv->frontend_list);
        INIT_LIST_HEAD(&drv->engine_list);
        INIT_LIST_HEAD(&drv->tcon_list);
  
        /* drm_vblank_init calls kcalloc, which can fail */
        ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
        if (ret)
-               goto free_mem_region;
+               goto cleanup_mode_config;
  
        drm->irq_enabled = true;
  
@@@ -141,7 -139,6 +141,6 @@@ finish_poll
        sun4i_framebuffer_free(drm);
  cleanup_mode_config:
        drm_mode_config_cleanup(drm);
- free_mem_region:
        of_reserved_mem_device_release(dev);
  free_drm:
        drm_dev_unref(drm);
@@@ -176,21 -173,7 +175,21 @@@ static bool sun4i_drv_node_is_frontend(
                of_device_is_compatible(node, "allwinner,sun5i-a13-display-frontend") ||
                of_device_is_compatible(node, "allwinner,sun6i-a31-display-frontend") ||
                of_device_is_compatible(node, "allwinner,sun7i-a20-display-frontend") ||
 -              of_device_is_compatible(node, "allwinner,sun8i-a33-display-frontend");
 +              of_device_is_compatible(node, "allwinner,sun8i-a33-display-frontend") ||
 +              of_device_is_compatible(node, "allwinner,sun9i-a80-display-frontend");
 +}
 +
 +static bool sun4i_drv_node_is_deu(struct device_node *node)
 +{
 +      return of_device_is_compatible(node, "allwinner,sun9i-a80-deu");
 +}
 +
 +static bool sun4i_drv_node_is_supported_frontend(struct device_node *node)
 +{
 +      if (IS_ENABLED(CONFIG_DRM_SUN4I_BACKEND))
 +              return !!of_match_node(sun4i_frontend_of_table, node);
 +
 +      return false;
  }
  
  static bool sun4i_drv_node_is_tcon(struct device_node *node)
@@@ -241,11 -224,9 +240,11 @@@ static int sun4i_drv_add_endpoints(stru
        int count = 0;
  
        /*
 -       * We don't support the frontend for now, so we will never
 -       * have a device bound. Just skip over it, but we still want
 -       * the rest our pipeline to be added.
 +       * The frontend has been disabled in some of our old device
 +       * trees. If we find a node that is the frontend and is
 +       * disabled, we should just follow through and parse its
 +       * child, but without adding it to the component list.
 +       * Otherwise, we obviously want to add it to the list.
         */
        if (!sun4i_drv_node_is_frontend(node) &&
            !of_device_is_available(node))
        if (sun4i_drv_node_is_connector(node))
                return 0;
  
 -      if (!sun4i_drv_node_is_frontend(node)) {
 +      /*
 +       * If the device is either just a regular device, or an
 +       * enabled frontend supported by the driver, we add it to our
 +       * component list.
 +       */
 +      if (!(sun4i_drv_node_is_frontend(node) ||
 +            sun4i_drv_node_is_deu(node)) ||
 +          (sun4i_drv_node_is_supported_frontend(node) &&
 +           of_device_is_available(node))) {
                /* Add current component */
                DRM_DEBUG_DRIVER("Adding component %pOF\n", node);
                drm_of_component_match_add(dev, match, compare_of, node);
@@@ -366,9 -339,7 +365,9 @@@ static const struct of_device_id sun4i_
        { .compatible = "allwinner,sun7i-a20-display-engine" },
        { .compatible = "allwinner,sun8i-a33-display-engine" },
        { .compatible = "allwinner,sun8i-a83t-display-engine" },
 +      { .compatible = "allwinner,sun8i-h3-display-engine" },
        { .compatible = "allwinner,sun8i-v3s-display-engine" },
 +      { .compatible = "allwinner,sun9i-a80-display-engine" },
        { }
  };
  MODULE_DEVICE_TABLE(of, sun4i_drv_of_table);
@@@ -52,10 -52,10 +52,10 @@@ static int sun4i_rgb_get_modes(struct d
        return drm_panel_get_modes(tcon->panel);
  }
  
 -static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 -                              struct drm_display_mode *mode)
 +static enum drm_mode_status sun4i_rgb_mode_valid(struct drm_encoder *crtc,
 +                                               const struct drm_display_mode *mode)
  {
 -      struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
 +      struct sun4i_rgb *rgb = drm_encoder_to_sun4i_rgb(crtc);
        struct sun4i_tcon *tcon = rgb->tcon;
        u32 hsync = mode->hsync_end - mode->hsync_start;
        u32 vsync = mode->vsync_end - mode->vsync_start;
@@@ -92,6 -92,8 +92,8 @@@
  
        DRM_DEBUG_DRIVER("Vertical parameters OK\n");
  
+       tcon->dclk_min_div = 6;
+       tcon->dclk_max_div = 127;
        rounded_rate = clk_round_rate(tcon->dclk, rate);
        if (rounded_rate < rate)
                return MODE_CLOCK_LOW;
  
  static struct drm_connector_helper_funcs sun4i_rgb_con_helper_funcs = {
        .get_modes      = sun4i_rgb_get_modes,
 -      .mode_valid     = sun4i_rgb_mode_valid,
  };
  
  static void
@@@ -155,7 -158,6 +157,7 @@@ static void sun4i_rgb_encoder_disable(s
  static struct drm_encoder_helper_funcs sun4i_rgb_enc_helper_funcs = {
        .disable        = sun4i_rgb_encoder_disable,
        .enable         = sun4i_rgb_encoder_enable,
 +      .mode_valid     = sun4i_rgb_mode_valid,
  };
  
  static void sun4i_rgb_enc_destroy(struct drm_encoder *encoder)
@@@ -17,7 -17,6 +17,7 @@@
  #include <drm/drm_encoder.h>
  #include <drm/drm_modes.h>
  #include <drm/drm_of.h>
 +#include <drm/drm_panel.h>
  
  #include <uapi/drm/drm_mode.h>
  
@@@ -85,7 -84,6 +85,7 @@@ static void sun4i_tcon_channel_set_stat
  
        switch (channel) {
        case 0:
 +              WARN_ON(!tcon->quirks->has_channel_0);
                regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG,
                                   SUN4I_TCON0_CTL_TCON_ENABLE,
                                   enabled ? SUN4I_TCON0_CTL_TCON_ENABLE : 0);
                return;
        }
  
-       if (enabled)
+       if (enabled) {
                clk_prepare_enable(clk);
-       else
+               clk_rate_exclusive_get(clk);
+       } else {
+               clk_rate_exclusive_put(clk);
                clk_disable_unprepare(clk);
+       }
  }
  
  static void sun4i_tcon_lvds_set_status(struct sun4i_tcon *tcon,
@@@ -278,8 -279,6 +281,8 @@@ static void sun4i_tcon0_mode_set_lvds(s
        u8 clk_delay;
        u32 reg, val = 0;
  
 +      WARN_ON(!tcon->quirks->has_channel_0);
 +
        tcon->dclk_min_div = 7;
        tcon->dclk_max_div = 7;
        sun4i_tcon0_mode_set_common(tcon, mode);
        regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG,
                           SUN4I_TCON_GCTL_IOMAP_MASK,
                           SUN4I_TCON_GCTL_IOMAP_TCON0);
+       /* Enable the output on the pins */
+       regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, 0xe0000000);
  }
  
  static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
                                     const struct drm_display_mode *mode)
  {
 +      struct drm_panel *panel = tcon->panel;
 +      struct drm_connector *connector = panel->connector;
 +      struct drm_display_info display_info = connector->display_info;
        unsigned int bp, hsync, vsync;
        u8 clk_delay;
        u32 val = 0;
  
 +      WARN_ON(!tcon->quirks->has_channel_0);
 +
        tcon->dclk_min_div = 6;
        tcon->dclk_max_div = 127;
        sun4i_tcon0_mode_set_common(tcon, mode);
                     SUN4I_TCON0_BASIC3_H_SYNC(hsync));
  
        /* Setup the polarity of the various signals */
 -      if (!(mode->flags & DRM_MODE_FLAG_PHSYNC))
 +      if (mode->flags & DRM_MODE_FLAG_PHSYNC)
                val |= SUN4I_TCON0_IO_POL_HSYNC_POSITIVE;
  
 -      if (!(mode->flags & DRM_MODE_FLAG_PVSYNC))
 +      if (mode->flags & DRM_MODE_FLAG_PVSYNC)
                val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE;
  
 +      /*
 +       * On A20 and similar SoCs, the only way to achieve Positive Edge
 +       * (Rising Edge), is setting dclk clock phase to 2/3(240°).
 +       * By default TCON works in Negative Edge(Falling Edge),
 +       * this is why phase is set to 0 in that case.
 +       * Unfortunately there's no way to logically invert dclk through
 +       * IO_POL register.
 +       * The only acceptable way to work, triple checked with scope,
 +       * is using clock phase set to 0° for Negative Edge and set to 240°
 +       * for Positive Edge.
 +       * On A33 and similar SoCs there would be a 90° phase option,
 +       * but it divides also dclk by 2.
 +       * Following code is a way to avoid quirks all around TCON
 +       * and DOTCLOCK drivers.
 +       */
 +      if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
 +              clk_set_phase(tcon->dclk, 240);
 +
 +      if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
 +              clk_set_phase(tcon->dclk, 0);
 +
        regmap_update_bits(tcon->regs, SUN4I_TCON0_IO_POL_REG,
                           SUN4I_TCON0_IO_POL_HSYNC_POSITIVE | SUN4I_TCON0_IO_POL_VSYNC_POSITIVE,
                           val);
@@@ -570,7 -546,6 +576,7 @@@ static irqreturn_t sun4i_tcon_handler(i
        struct sun4i_tcon *tcon = private;
        struct drm_device *drm = tcon->drm;
        struct sun4i_crtc *scrtc = tcon->crtc;
 +      struct sunxi_engine *engine = scrtc->engine;
        unsigned int status;
  
        regmap_read(tcon->regs, SUN4I_TCON_GINT0_REG, &status);
                           SUN4I_TCON_GINT0_VBLANK_INT(1),
                           0);
  
 +      if (engine->ops->vblank_quirk)
 +              engine->ops->vblank_quirk(engine);
 +
        return IRQ_HANDLED;
  }
  
@@@ -604,12 -576,10 +610,12 @@@ static int sun4i_tcon_init_clocks(struc
        }
        clk_prepare_enable(tcon->clk);
  
 -      tcon->sclk0 = devm_clk_get(dev, "tcon-ch0");
 -      if (IS_ERR(tcon->sclk0)) {
 -              dev_err(dev, "Couldn't get the TCON channel 0 clock\n");
 -              return PTR_ERR(tcon->sclk0);
 +      if (tcon->quirks->has_channel_0) {
 +              tcon->sclk0 = devm_clk_get(dev, "tcon-ch0");
 +              if (IS_ERR(tcon->sclk0)) {
 +                      dev_err(dev, "Couldn't get the TCON channel 0 clock\n");
 +                      return PTR_ERR(tcon->sclk0);
 +              }
        }
  
        if (tcon->quirks->has_channel_1) {
@@@ -875,7 -845,6 +881,7 @@@ static int sun4i_tcon_bind(struct devic
        struct sunxi_engine *engine;
        struct device_node *remote;
        struct sun4i_tcon *tcon;
 +      struct reset_control *edp_rstc;
        bool has_lvds_rst, has_lvds_alt, can_lvds;
        int ret;
  
                return PTR_ERR(tcon->lcd_rst);
        }
  
 +      if (tcon->quirks->needs_edp_reset) {
 +              edp_rstc = devm_reset_control_get_shared(dev, "edp");
 +              if (IS_ERR(edp_rstc)) {
 +                      dev_err(dev, "Couldn't get edp reset line\n");
 +                      return PTR_ERR(edp_rstc);
 +              }
 +
 +              ret = reset_control_deassert(edp_rstc);
 +              if (ret) {
 +                      dev_err(dev, "Couldn't deassert edp reset line\n");
 +                      return ret;
 +              }
 +      }
 +
        /* Make sure our TCON is reset */
        ret = reset_control_reset(tcon->lcd_rst);
        if (ret) {
                return ret;
        }
  
-       /*
-        * This can only be made optional since we've had DT nodes
-        * without the LVDS reset properties.
-        *
-        * If the property is missing, just disable LVDS, and print a
-        * warning.
-        */
-       tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
-       if (IS_ERR(tcon->lvds_rst)) {
-               dev_err(dev, "Couldn't get our reset line\n");
-               return PTR_ERR(tcon->lvds_rst);
-       } else if (tcon->lvds_rst) {
-               has_lvds_rst = true;
-               reset_control_reset(tcon->lvds_rst);
-       } else {
-               has_lvds_rst = false;
-       }
+       if (tcon->quirks->supports_lvds) {
+               /*
+                * This can only be made optional since we've had DT
+                * nodes without the LVDS reset properties.
+                *
+                * If the property is missing, just disable LVDS, and
+                * print a warning.
+                */
+               tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
+               if (IS_ERR(tcon->lvds_rst)) {
+                       dev_err(dev, "Couldn't get our reset line\n");
+                       return PTR_ERR(tcon->lvds_rst);
+               } else if (tcon->lvds_rst) {
+                       has_lvds_rst = true;
+                       reset_control_reset(tcon->lvds_rst);
+               } else {
+                       has_lvds_rst = false;
+               }
  
-       /*
-        * This can only be made optional since we've had DT nodes
-        * without the LVDS reset properties.
-        *
-        * If the property is missing, just disable LVDS, and print a
-        * warning.
-        */
-       if (tcon->quirks->has_lvds_alt) {
-               tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
-               if (IS_ERR(tcon->lvds_pll)) {
-                       if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
-                               has_lvds_alt = false;
+               /*
+                * This can only be made optional since we've had DT
+                * nodes without the LVDS reset properties.
+                *
+                * If the property is missing, just disable LVDS, and
+                * print a warning.
+                */
+               if (tcon->quirks->has_lvds_alt) {
+                       tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
+                       if (IS_ERR(tcon->lvds_pll)) {
+                               if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
+                                       has_lvds_alt = false;
+                               } else {
+                                       dev_err(dev, "Couldn't get the LVDS PLL\n");
+                                       return PTR_ERR(tcon->lvds_pll);
+                               }
                        } else {
-                               dev_err(dev, "Couldn't get the LVDS PLL\n");
-                               return PTR_ERR(tcon->lvds_pll);
+                               has_lvds_alt = true;
                        }
-               } else {
-                       has_lvds_alt = true;
                }
-       }
  
-       if (!has_lvds_rst || (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
-               dev_warn(dev,
-                        "Missing LVDS properties, Please upgrade your DT\n");
-               dev_warn(dev, "LVDS output disabled\n");
-               can_lvds = false;
+               if (!has_lvds_rst ||
+                   (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
+                       dev_warn(dev, "Missing LVDS properties, Please upgrade your DT\n");
+                       dev_warn(dev, "LVDS output disabled\n");
+                       can_lvds = false;
+               } else {
+                       can_lvds = true;
+               }
        } else {
-               can_lvds = true;
+               can_lvds = false;
        }
  
        ret = sun4i_tcon_init_clocks(dev, tcon);
                goto err_free_clocks;
        }
  
 -      ret = sun4i_dclk_create(dev, tcon);
 -      if (ret) {
 -              dev_err(dev, "Couldn't create our TCON dot clock\n");
 -              goto err_free_clocks;
 +      if (tcon->quirks->has_channel_0) {
 +              ret = sun4i_dclk_create(dev, tcon);
 +              if (ret) {
 +                      dev_err(dev, "Couldn't create our TCON dot clock\n");
 +                      goto err_free_clocks;
 +              }
        }
  
        ret = sun4i_tcon_init_irq(dev, tcon);
        return 0;
  
  err_free_dotclock:
 -      sun4i_dclk_free(tcon);
 +      if (tcon->quirks->has_channel_0)
 +              sun4i_dclk_free(tcon);
  err_free_clocks:
        sun4i_tcon_free_clocks(tcon);
  err_assert_reset:
@@@ -1059,8 -1015,7 +1069,8 @@@ static void sun4i_tcon_unbind(struct de
        struct sun4i_tcon *tcon = dev_get_drvdata(dev);
  
        list_del(&tcon->list);
 -      sun4i_dclk_free(tcon);
 +      if (tcon->quirks->has_channel_0)
 +              sun4i_dclk_free(tcon);
        sun4i_tcon_free_clocks(tcon);
  }
  
@@@ -1157,19 -1112,16 +1167,19 @@@ static int sun6i_tcon_set_mux(struct su
  }
  
  static const struct sun4i_tcon_quirks sun4i_a10_quirks = {
 +      .has_channel_0          = true,
        .has_channel_1          = true,
        .set_mux                = sun4i_a10_tcon_set_mux,
  };
  
  static const struct sun4i_tcon_quirks sun5i_a13_quirks = {
 +      .has_channel_0          = true,
        .has_channel_1          = true,
        .set_mux                = sun5i_a13_tcon_set_mux,
  };
  
  static const struct sun4i_tcon_quirks sun6i_a31_quirks = {
 +      .has_channel_0          = true,
        .has_channel_1          = true,
        .has_lvds_alt           = true,
        .needs_de_be_mux        = true,
  };
  
  static const struct sun4i_tcon_quirks sun6i_a31s_quirks = {
 +      .has_channel_0          = true,
        .has_channel_1          = true,
        .needs_de_be_mux        = true,
  };
  
  static const struct sun4i_tcon_quirks sun7i_a20_quirks = {
 +      .has_channel_0          = true,
        .has_channel_1          = true,
        /* Same display pipeline structure as A10 */
        .set_mux                = sun4i_a10_tcon_set_mux,
  };
  
  static const struct sun4i_tcon_quirks sun8i_a33_quirks = {
 +      .has_channel_0          = true,
        .has_lvds_alt           = true,
  };
  
  static const struct sun4i_tcon_quirks sun8i_a83t_lcd_quirks = {
+       .supports_lvds          = true,
 +      .has_channel_0          = true,
 +};
 +
 +static const struct sun4i_tcon_quirks sun8i_a83t_tv_quirks = {
 +      .has_channel_1          = true,
  };
  
  static const struct sun4i_tcon_quirks sun8i_v3s_quirks = {
 -      /* nothing is supported */
 +      .has_channel_0          = true,
 +};
 +
 +static const struct sun4i_tcon_quirks sun9i_a80_tcon_lcd_quirks = {
 +      .has_channel_0  = true,
 +      .needs_edp_reset = true,
 +};
 +
 +static const struct sun4i_tcon_quirks sun9i_a80_tcon_tv_quirks = {
 +      .has_channel_1  = true,
 +      .needs_edp_reset = true,
  };
  
  /* sun4i_drv uses this list to check if a device node is a TCON */
@@@ -1225,10 -1160,7 +1236,10 @@@ const struct of_device_id sun4i_tcon_of
        { .compatible = "allwinner,sun7i-a20-tcon", .data = &sun7i_a20_quirks },
        { .compatible = "allwinner,sun8i-a33-tcon", .data = &sun8i_a33_quirks },
        { .compatible = "allwinner,sun8i-a83t-tcon-lcd", .data = &sun8i_a83t_lcd_quirks },
 +      { .compatible = "allwinner,sun8i-a83t-tcon-tv", .data = &sun8i_a83t_tv_quirks },
        { .compatible = "allwinner,sun8i-v3s-tcon", .data = &sun8i_v3s_quirks },
 +      { .compatible = "allwinner,sun9i-a80-tcon-lcd", .data = &sun9i_a80_tcon_lcd_quirks },
 +      { .compatible = "allwinner,sun9i-a80-tcon-tv", .data = &sun9i_a80_tcon_tv_quirks },
        { }
  };
  MODULE_DEVICE_TABLE(of, sun4i_tcon_of_table);
  struct sun4i_tcon;
  
  struct sun4i_tcon_quirks {
 +      bool    has_channel_0;  /* a83t does not have channel 0 on second TCON */
        bool    has_channel_1;  /* a33 does not have channel 1 */
        bool    has_lvds_alt;   /* Does the LVDS clock have a parent other than the TCON clock? */
        bool    needs_de_be_mux; /* sun6i needs mux to select backend */
 +      bool    needs_edp_reset; /* a80 edp reset needed for tcon0 access */
+       bool    supports_lvds;   /* Does the TCON support an LVDS output? */
  
        /* callback to handle tcon muxing options */
        int     (*set_mux)(struct sun4i_tcon *, const struct drm_encoder *);
@@@ -383,12 -383,6 +383,12 @@@ static const u32 tegra20_primary_format
        DRM_FORMAT_XRGB8888,
  };
  
 +static const u64 tegra20_modifiers[] = {
 +      DRM_FORMAT_MOD_LINEAR,
 +      DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED,
 +      DRM_FORMAT_MOD_INVALID
 +};
 +
  static const u32 tegra114_primary_formats[] = {
        DRM_FORMAT_ARGB4444,
        DRM_FORMAT_ARGB1555,
@@@ -436,17 -430,6 +436,17 @@@ static const u32 tegra124_primary_forma
        DRM_FORMAT_BGRX8888,
  };
  
 +static const u64 tegra124_modifiers[] = {
 +      DRM_FORMAT_MOD_LINEAR,
 +      DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0),
 +      DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1),
 +      DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2),
 +      DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3),
 +      DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4),
 +      DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5),
 +      DRM_FORMAT_MOD_INVALID
 +};
 +
  static int tegra_plane_atomic_check(struct drm_plane *plane,
                                    struct drm_plane_state *state)
  {
@@@ -613,7 -596,6 +613,7 @@@ static struct drm_plane *tegra_primary_
        enum drm_plane_type type = DRM_PLANE_TYPE_PRIMARY;
        struct tegra_plane *plane;
        unsigned int num_formats;
 +      const u64 *modifiers;
        const u32 *formats;
        int err;
  
  
        num_formats = dc->soc->num_primary_formats;
        formats = dc->soc->primary_formats;
 +      modifiers = dc->soc->modifiers;
  
        err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
                                       &tegra_plane_funcs, formats,
 -                                     num_formats, NULL, type, NULL);
 +                                     num_formats, modifiers, type, NULL);
        if (err < 0) {
                kfree(plane);
                return ERR_PTR(err);
@@@ -883,13 -864,11 +883,13 @@@ static const u32 tegra124_overlay_forma
  
  static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
                                                       struct tegra_dc *dc,
 -                                                     unsigned int index)
 +                                                     unsigned int index,
 +                                                     bool cursor)
  {
        unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm);
        struct tegra_plane *plane;
        unsigned int num_formats;
 +      enum drm_plane_type type;
        const u32 *formats;
        int err;
  
        num_formats = dc->soc->num_overlay_formats;
        formats = dc->soc->overlay_formats;
  
 +      if (!cursor)
 +              type = DRM_PLANE_TYPE_OVERLAY;
 +      else
 +              type = DRM_PLANE_TYPE_CURSOR;
 +
        err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
                                       &tegra_plane_funcs, formats,
 -                                     num_formats, NULL,
 -                                     DRM_PLANE_TYPE_OVERLAY, NULL);
 +                                     num_formats, NULL, type, NULL);
        if (err < 0) {
                kfree(plane);
                return ERR_PTR(err);
@@@ -963,7 -938,6 +963,7 @@@ static struct drm_plane *tegra_dc_add_p
                                             struct tegra_dc *dc)
  {
        struct drm_plane *planes[2], *primary;
 +      unsigned int planes_num;
        unsigned int i;
        int err;
  
        if (IS_ERR(primary))
                return primary;
  
 -      for (i = 0; i < 2; i++) {
 -              planes[i] = tegra_dc_overlay_plane_create(drm, dc, 1 + i);
 +      if (dc->soc->supports_cursor)
 +              planes_num = 2;
 +      else
 +              planes_num = 1;
 +
 +      for (i = 0; i < planes_num; i++) {
 +              planes[i] = tegra_dc_overlay_plane_create(drm, dc, 1 + i,
 +                                                        false);
                if (IS_ERR(planes[i])) {
                        err = PTR_ERR(planes[i]);
  
@@@ -1391,7 -1359,7 +1391,7 @@@ static u32 tegra_dc_get_vblank_counter(
                return host1x_syncpt_read(dc->syncpt);
  
        /* fallback to software emulated VBLANK counter */
 -      return drm_crtc_vblank_count(&dc->base);
 +      return (u32)drm_crtc_vblank_count(&dc->base);
  }
  
  static int tegra_dc_enable_vblank(struct drm_crtc *crtc)
@@@ -1736,6 -1704,31 +1736,6 @@@ static void tegra_crtc_atomic_enable(st
        drm_crtc_vblank_on(crtc);
  }
  
 -static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
 -                                 struct drm_crtc_state *state)
 -{
 -      struct tegra_atomic_state *s = to_tegra_atomic_state(state->state);
 -      struct tegra_dc_state *tegra = to_dc_state(state);
 -
 -      /*
 -       * The display hub display clock needs to be fed by the display clock
 -       * with the highest frequency to ensure proper functioning of all the
 -       * displays.
 -       *
 -       * Note that this isn't used before Tegra186, but it doesn't hurt and
 -       * conditionalizing it would make the code less clean.
 -       */
 -      if (state->active) {
 -              if (!s->clk_disp || tegra->pclk > s->rate) {
 -                      s->dc = to_tegra_dc(crtc);
 -                      s->clk_disp = s->dc->clk;
 -                      s->rate = tegra->pclk;
 -              }
 -      }
 -
 -      return 0;
 -}
 -
  static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
                                    struct drm_crtc_state *old_crtc_state)
  {
@@@ -1772,6 -1765,7 +1772,6 @@@ static void tegra_crtc_atomic_flush(str
  }
  
  static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
 -      .atomic_check = tegra_crtc_atomic_check,
        .atomic_begin = tegra_crtc_atomic_begin,
        .atomic_flush = tegra_crtc_atomic_flush,
        .atomic_enable = tegra_crtc_atomic_enable,
@@@ -1870,13 -1864,6 +1870,13 @@@ static int tegra_dc_init(struct host1x_
                        err = PTR_ERR(cursor);
                        goto cleanup;
                }
 +      } else {
 +              /* dedicate one overlay to mouse cursor */
 +              cursor = tegra_dc_overlay_plane_create(drm, dc, 2, true);
 +              if (IS_ERR(cursor)) {
 +                      err = PTR_ERR(cursor);
 +                      goto cleanup;
 +              }
        }
  
        err = drm_crtc_init_with_planes(drm, &dc->base, primary, cursor,
@@@ -1916,8 -1903,12 +1916,12 @@@ cleanup
        if (!IS_ERR(primary))
                drm_plane_cleanup(primary);
  
-       if (group && tegra->domain) {
-               iommu_detach_group(tegra->domain, group);
+       if (group && dc->domain) {
+               if (group == tegra->group) {
+                       iommu_detach_group(dc->domain, group);
+                       tegra->group = NULL;
+               }
                dc->domain = NULL;
        }
  
  
  static int tegra_dc_exit(struct host1x_client *client)
  {
+       struct drm_device *drm = dev_get_drvdata(client->parent);
        struct iommu_group *group = iommu_group_get(client->dev);
        struct tegra_dc *dc = host1x_client_to_dc(client);
+       struct tegra_drm *tegra = drm->dev_private;
        int err;
  
        devm_free_irq(dc->dev, dc->irq, dc);
        }
  
        if (group && dc->domain) {
-               iommu_detach_group(dc->domain, group);
+               if (group == tegra->group) {
+                       iommu_detach_group(dc->domain, group);
+                       tegra->group = NULL;
+               }
                dc->domain = NULL;
        }
  
@@@ -1967,7 -1964,6 +1977,7 @@@ static const struct tegra_dc_soc_info t
        .primary_formats = tegra20_primary_formats,
        .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
        .overlay_formats = tegra20_overlay_formats,
 +      .modifiers = tegra20_modifiers,
  };
  
  static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
        .primary_formats = tegra20_primary_formats,
        .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
        .overlay_formats = tegra20_overlay_formats,
 +      .modifiers = tegra20_modifiers,
  };
  
  static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
        .primary_formats = tegra114_primary_formats,
        .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
        .overlay_formats = tegra114_overlay_formats,
 +      .modifiers = tegra20_modifiers,
  };
  
  static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
        .primary_formats = tegra114_primary_formats,
        .num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats),
        .overlay_formats = tegra114_overlay_formats,
 +      .modifiers = tegra124_modifiers,
  };
  
  static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
        .primary_formats = tegra114_primary_formats,
        .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
        .overlay_formats = tegra114_overlay_formats,
 +      .modifiers = tegra124_modifiers,
  };
  
  static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@@ -42,10 -42,6 +42,10 @@@ static int tegra_atomic_check(struct dr
        if (err < 0)
                return err;
  
 +      err = tegra_display_hub_atomic_check(drm, state);
 +      if (err < 0)
 +              return err;
 +
        err = drm_atomic_normalize_zpos(drm, state);
        if (err < 0)
                return err;
        return 0;
  }
  
 -static struct drm_atomic_state *
 -tegra_atomic_state_alloc(struct drm_device *drm)
 -{
 -      struct tegra_atomic_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 -
 -      if (!state || drm_atomic_state_init(drm, &state->base) < 0) {
 -              kfree(state);
 -              return NULL;
 -      }
 -
 -      return &state->base;
 -}
 -
 -static void tegra_atomic_state_clear(struct drm_atomic_state *state)
 -{
 -      struct tegra_atomic_state *tegra = to_tegra_atomic_state(state);
 -
 -      drm_atomic_state_default_clear(state);
 -      tegra->clk_disp = NULL;
 -      tegra->dc = NULL;
 -      tegra->rate = 0;
 -}
 -
 -static void tegra_atomic_state_free(struct drm_atomic_state *state)
 -{
 -      drm_atomic_state_default_release(state);
 -      kfree(state);
 -}
 -
  static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = {
        .fb_create = tegra_fb_create,
  #ifdef CONFIG_DRM_FBDEV_EMULATION
@@@ -67,6 -92,9 +67,6 @@@
  #endif
        .atomic_check = tegra_atomic_check,
        .atomic_commit = drm_atomic_helper_commit,
 -      .atomic_state_alloc = tegra_atomic_state_alloc,
 -      .atomic_state_clear = tegra_atomic_state_clear,
 -      .atomic_state_free = tegra_atomic_state_free,
  };
  
  static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
@@@ -222,6 -250,7 +222,7 @@@ static void tegra_drm_unload(struct drm
  
        drm_kms_helper_poll_fini(drm);
        tegra_drm_fb_exit(drm);
+       drm_atomic_helper_shutdown(drm);
        drm_mode_config_cleanup(drm);
  
        err = host1x_device_exit(device);
@@@ -68,21 -68,6 +68,21 @@@ static void tegra_plane_atomic_destroy_
        kfree(state);
  }
  
 +static bool tegra_plane_format_mod_supported(struct drm_plane *plane,
 +                                           uint32_t format,
 +                                           uint64_t modifier)
 +{
 +      const struct drm_format_info *info = drm_format_info(format);
 +
 +      if (modifier == DRM_FORMAT_MOD_LINEAR)
 +              return true;
 +
 +      if (info->num_planes == 1)
 +              return true;
 +
 +      return false;
 +}
 +
  const struct drm_plane_funcs tegra_plane_funcs = {
        .update_plane = drm_atomic_helper_update_plane,
        .disable_plane = drm_atomic_helper_disable_plane,
@@@ -90,7 -75,6 +90,7 @@@
        .reset = tegra_plane_reset,
        .atomic_duplicate_state = tegra_plane_atomic_duplicate_state,
        .atomic_destroy_state = tegra_plane_atomic_destroy_state,
 +      .format_mod_supported = tegra_plane_format_mod_supported,
  };
  
  int tegra_plane_state_add(struct tegra_plane *plane,
@@@ -98,6 -82,7 +98,6 @@@
  {
        struct drm_crtc_state *crtc_state;
        struct tegra_dc_state *tegra;
 -      struct drm_rect clip;
        int err;
  
        /* Propagate errors from allocation or locking failures. */
        if (IS_ERR(crtc_state))
                return PTR_ERR(crtc_state);
  
 -      clip.x1 = 0;
 -      clip.y1 = 0;
 -      clip.x2 = crtc_state->mode.hdisplay;
 -      clip.y2 = crtc_state->mode.vdisplay;
 -
        /* Check plane state for visibility and calculate clipping bounds */
 -      err = drm_atomic_helper_check_plane_state(state, crtc_state, &clip,
 +      err = drm_atomic_helper_check_plane_state(state, crtc_state,
                                                  0, INT_MAX, true, true);
        if (err < 0)
                return err;
@@@ -307,13 -297,17 +307,17 @@@ int tegra_plane_format_get_alpha(unsign
        case WIN_COLOR_DEPTH_B8G8R8X8:
                *alpha = WIN_COLOR_DEPTH_B8G8R8A8;
                return 0;
+       case WIN_COLOR_DEPTH_B5G6R5:
+               *alpha = opaque;
+               return 0;
        }
  
        return -EINVAL;
  }
  
 -unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane,
 -                                         struct tegra_plane *other)
 +static unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane,
 +                                                struct tegra_plane *other)
  {
        unsigned int index = 0, i;
  
@@@ -340,9 -334,6 +344,6 @@@ void tegra_plane_check_dependent(struc
        unsigned int zpos[2];
        unsigned int i;
  
-       for (i = 0; i < 3; i++)
-               state->dependent[i] = false;
        for (i = 0; i < 2; i++)
                zpos[i] = 0;
  
  
                index = tegra_plane_get_overlap_index(tegra, p);
  
+               state->dependent[index] = false;
                /*
                 * If any of the other planes is on top of this plane and uses
                 * a format with an alpha component, mark this plane as being
@@@ -83,7 -83,6 +83,7 @@@ static void virtio_gpu_unref_list(struc
        struct ttm_validate_buffer *buf;
        struct ttm_buffer_object *bo;
        struct virtio_gpu_object *qobj;
 +
        list_for_each_entry(buf, head, head) {
                bo = buf->bo;
                qobj = container_of(bo, struct virtio_gpu_object, tbo);
@@@ -198,6 -197,9 +198,9 @@@ static int virtio_gpu_getparam_ioctl(st
        case VIRTGPU_PARAM_3D_FEATURES:
                value = vgdev->has_virgl_3d == true ? 1 : 0;
                break;
+       case VIRTGPU_PARAM_CAPSET_QUERY_FIX:
+               value = 1;
+               break;
        default:
                return -EINVAL;
        }
@@@ -473,16 -475,19 +476,20 @@@ static int virtio_gpu_get_caps_ioctl(st
  {
        struct virtio_gpu_device *vgdev = dev->dev_private;
        struct drm_virtgpu_get_caps *args = data;
-       int size;
+       unsigned size, host_caps_size;
        int i;
        int found_valid = -1;
        int ret;
        struct virtio_gpu_drv_cap_cache *cache_ent;
        void *ptr;
 +
        if (vgdev->num_capsets == 0)
                return -ENOSYS;
  
+       /* don't allow userspace to pass 0 */
+       if (args->size == 0)
+               return -EINVAL;
        spin_lock(&vgdev->display_info_lock);
        for (i = 0; i < vgdev->num_capsets; i++) {
                if (vgdev->capsets[i].id == args->cap_set_id) {
                return -EINVAL;
        }
  
-       size = vgdev->capsets[found_valid].max_size;
-       if (args->size > size) {
-               spin_unlock(&vgdev->display_info_lock);
-               return -EINVAL;
-       }
+       host_caps_size = vgdev->capsets[found_valid].max_size;
+       /* only copy to user the minimum of the host caps size or the guest caps size */
+       size = min(args->size, host_caps_size);
  
        list_for_each_entry(cache_ent, &vgdev->cap_cache, head) {
                if (cache_ent->id == args->cap_set_id &&
  
        ret = wait_event_timeout(vgdev->resp_wq,
                                 atomic_read(&cache_ent->is_valid), 5 * HZ);
 +      if (!ret)
 +              return -EBUSY;
  
        ptr = cache_ent->caps_cache;
  
@@@ -534,34 -535,33 +539,34 @@@ copy_exit
  
  struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = {
        DRM_IOCTL_DEF_DRV(VIRTGPU_MAP, virtio_gpu_map_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        DRM_IOCTL_DEF_DRV(VIRTGPU_EXECBUFFER, virtio_gpu_execbuffer_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        DRM_IOCTL_DEF_DRV(VIRTGPU_GETPARAM, virtio_gpu_getparam_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE,
                          virtio_gpu_resource_create_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_INFO, virtio_gpu_resource_info_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        /* make transfer async to the main ring? - no sure, can we
 -         thread these in the underlying GL */
 +       * thread these in the underlying GL
 +       */
        DRM_IOCTL_DEF_DRV(VIRTGPU_TRANSFER_FROM_HOST,
                          virtio_gpu_transfer_from_host_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(VIRTGPU_TRANSFER_TO_HOST,
                          virtio_gpu_transfer_to_host_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        DRM_IOCTL_DEF_DRV(VIRTGPU_WAIT, virtio_gpu_wait_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  
        DRM_IOCTL_DEF_DRV(VIRTGPU_GET_CAPS, virtio_gpu_get_caps_ioctl,
 -                        DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 +                        DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
  };
@@@ -301,8 -301,6 +301,8 @@@ static void vmw_print_capabilities(uint
                DRM_INFO("  Guest Backed Resources.\n");
        if (capabilities & SVGA_CAP_DX)
                DRM_INFO("  DX Features.\n");
 +      if (capabilities & SVGA_CAP_HP_CMD_QUEUE)
 +              DRM_INFO("  HP Command Queue.\n");
  }
  
  /**
@@@ -1279,7 -1277,8 +1279,7 @@@ static void vmw_master_drop(struct drm_
        ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
        ttm_vt_unlock(&dev_priv->fbdev_master.lock);
  
 -      if (dev_priv->enable_fb)
 -              vmw_fb_on(dev_priv);
 +      vmw_fb_refresh(dev_priv);
  }
  
  /**
@@@ -1338,6 -1337,19 +1338,19 @@@ static void __vmw_svga_disable(struct v
   */
  void vmw_svga_disable(struct vmw_private *dev_priv)
  {
+       /*
+        * Disabling SVGA will turn off device modesetting capabilities, so
+        * notify KMS about that so that it doesn't cache atomic state that
+        * isn't valid anymore, for example crtcs turned on.
+        * Strictly we'd want to do this under the SVGA lock (or an SVGA mutex),
+        * but vmw_kms_lost_device() takes the reservation sem and thus we'll
+        * end up with lock order reversal. Thus, a master may actually perform
+        * a new modeset just after we call vmw_kms_lost_device() and race with
+        * vmw_svga_disable(), but that should at worst cause atomic KMS state
+        * to be inconsistent with the device, causing modesetting problems.
+        *
+        */
+       vmw_kms_lost_device(dev_priv->dev);
        ttm_write_lock(&dev_priv->reservation_sem, false);
        spin_lock(&dev_priv->svga_lock);
        if (dev_priv->bdev.man[TTM_PL_VRAM].use_type) {
@@@ -1369,23 -1381,28 +1382,23 @@@ static int vmwgfx_pm_notifier(struct no
  
        switch (val) {
        case PM_HIBERNATION_PREPARE:
 -              if (dev_priv->enable_fb)
 -                      vmw_fb_off(dev_priv);
 -              ttm_suspend_lock(&dev_priv->reservation_sem);
 -
                /*
 -               * This empties VRAM and unbinds all GMR bindings.
 -               * Buffer contents is moved to swappable memory.
 +               * Take the reservation sem in write mode, which will make sure
 +               * there are no other processes holding a buffer object
 +               * reservation, meaning we should be able to evict all buffer
 +               * objects if needed.
 +               * Once user-space processes have been frozen, we can release
 +               * the lock again.
                 */
 -              vmw_execbuf_release_pinned_bo(dev_priv);
 -              vmw_resource_evict_all(dev_priv);
 -              vmw_release_device_early(dev_priv);
 -              ttm_bo_swapout_all(&dev_priv->bdev);
 -              vmw_fence_fifo_down(dev_priv->fman);
 +              ttm_suspend_lock(&dev_priv->reservation_sem);
 +              dev_priv->suspend_locked = true;
                break;
        case PM_POST_HIBERNATION:
        case PM_POST_RESTORE:
 -              vmw_fence_fifo_up(dev_priv->fman);
 -              ttm_suspend_unlock(&dev_priv->reservation_sem);
 -              if (dev_priv->enable_fb)
 -                      vmw_fb_on(dev_priv);
 -              break;
 -      case PM_RESTORE_PREPARE:
 +              if (READ_ONCE(dev_priv->suspend_locked)) {
 +                      dev_priv->suspend_locked = false;
 +                      ttm_suspend_unlock(&dev_priv->reservation_sem);
 +              }
                break;
        default:
                break;
@@@ -1436,48 -1453,25 +1449,48 @@@ static int vmw_pm_freeze(struct device 
        struct pci_dev *pdev = to_pci_dev(kdev);
        struct drm_device *dev = pci_get_drvdata(pdev);
        struct vmw_private *dev_priv = vmw_priv(dev);
 +      int ret;
  
 -      dev_priv->suspended = true;
 +      /*
 +       * Unlock for vmw_kms_suspend.
 +       * No user-space processes should be running now.
 +       */
 +      ttm_suspend_unlock(&dev_priv->reservation_sem);
 +      ret = vmw_kms_suspend(dev_priv->dev);
 +      if (ret) {
 +              ttm_suspend_lock(&dev_priv->reservation_sem);
 +              DRM_ERROR("Failed to freeze modesetting.\n");
 +              return ret;
 +      }
        if (dev_priv->enable_fb)
 -              vmw_fifo_resource_dec(dev_priv);
 +              vmw_fb_off(dev_priv);
  
 +      ttm_suspend_lock(&dev_priv->reservation_sem);
 +      vmw_execbuf_release_pinned_bo(dev_priv);
 +      vmw_resource_evict_all(dev_priv);
 +      vmw_release_device_early(dev_priv);
 +      ttm_bo_swapout_all(&dev_priv->bdev);
 +      if (dev_priv->enable_fb)
 +              vmw_fifo_resource_dec(dev_priv);
        if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
                DRM_ERROR("Can't hibernate while 3D resources are active.\n");
                if (dev_priv->enable_fb)
                        vmw_fifo_resource_inc(dev_priv);
                WARN_ON(vmw_request_device_late(dev_priv));
 -              dev_priv->suspended = false;
 +              dev_priv->suspend_locked = false;
 +              ttm_suspend_unlock(&dev_priv->reservation_sem);
 +              if (dev_priv->suspend_state)
 +                      vmw_kms_resume(dev);
 +              if (dev_priv->enable_fb)
 +                      vmw_fb_on(dev_priv);
 +              vmw_fb_refresh(dev_priv);
                return -EBUSY;
        }
  
 -      if (dev_priv->enable_fb)
 -              __vmw_svga_disable(dev_priv);
 +      vmw_fence_fifo_down(dev_priv->fman);
 +      __vmw_svga_disable(dev_priv);
        
        vmw_release_device_late(dev_priv);
 -
        return 0;
  }
  
@@@ -1501,16 -1495,7 +1514,16 @@@ static int vmw_pm_restore(struct devic
        if (dev_priv->enable_fb)
                __vmw_svga_enable(dev_priv);
  
 -      dev_priv->suspended = false;
 +      vmw_fence_fifo_up(dev_priv->fman);
 +      dev_priv->suspend_locked = false;
 +      ttm_suspend_unlock(&dev_priv->reservation_sem);
 +      if (dev_priv->suspend_state)
 +              vmw_kms_resume(dev_priv->dev);
 +
 +      if (dev_priv->enable_fb)
 +              vmw_fb_on(dev_priv);
 +
 +      vmw_fb_refresh(dev_priv);
  
        return 0;
  }
  #include <linux/sync_file.h>
  
  #define VMWGFX_DRIVER_NAME "vmwgfx"
 -#define VMWGFX_DRIVER_DATE "20170612"
 +#define VMWGFX_DRIVER_DATE "20180322"
  #define VMWGFX_DRIVER_MAJOR 2
  #define VMWGFX_DRIVER_MINOR 14
 -#define VMWGFX_DRIVER_PATCHLEVEL 0
 +#define VMWGFX_DRIVER_PATCHLEVEL 1
  #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
  #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
  #define VMWGFX_MAX_RELOCATIONS 2048
@@@ -92,8 -92,6 +92,8 @@@ struct vmw_dma_buffer 
        s32 pin_count;
        /* Not ref-counted.  Protected by binding_mutex */
        struct vmw_resource *dx_query_ctx;
 +      /* Protected by reservation */
 +      struct ttm_bo_kmap_obj map;
  };
  
  /**
@@@ -425,7 -423,6 +425,7 @@@ struct vmw_private 
        struct vmw_framebuffer *implicit_fb;
        struct mutex global_kms_state_mutex;
        spinlock_t cursor_lock;
 +      struct drm_atomic_state *suspend_state;
  
        /*
         * Context and surface management.
        struct vmw_master *active_master;
        struct vmw_master fbdev_master;
        struct notifier_block pm_nb;
 -      bool suspended;
        bool refuse_hibernation;
 +      bool suspend_locked;
  
        struct mutex release_mutex;
        atomic_t num_fifo_resources;
@@@ -676,13 -673,11 +676,13 @@@ extern void vmw_resource_move_notify(st
                                     struct ttm_mem_reg *mem);
  extern void vmw_query_move_notify(struct ttm_buffer_object *bo,
                                  struct ttm_mem_reg *mem);
 +extern void vmw_resource_swap_notify(struct ttm_buffer_object *bo);
  extern int vmw_query_readback_all(struct vmw_dma_buffer *dx_query_mob);
  extern void vmw_fence_single_bo(struct ttm_buffer_object *bo,
                                struct vmw_fence_obj *fence);
  extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
  
 +
  /**
   * DMA buffer helper routines - vmwgfx_dmabuf.c
   */
@@@ -705,8 -700,6 +705,8 @@@ extern int vmw_dmabuf_unpin(struct vmw_
  extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf,
                                 SVGAGuestPtr *ptr);
  extern void vmw_bo_pin_reserved(struct vmw_dma_buffer *bo, bool pin);
 +extern void *vmw_dma_buffer_map_and_cache(struct vmw_dma_buffer *vbo);
 +extern void vmw_dma_buffer_unmap(struct vmw_dma_buffer *vbo);
  
  /**
   * Misc Ioctl functionality - vmwgfx_ioctl.c
@@@ -773,7 -766,6 +773,7 @@@ extern struct ttm_placement vmw_evictab
  extern struct ttm_placement vmw_srf_placement;
  extern struct ttm_placement vmw_mob_placement;
  extern struct ttm_placement vmw_mob_ne_placement;
 +extern struct ttm_placement vmw_nonfixed_placement;
  extern struct ttm_bo_driver vmw_bo_driver;
  extern int vmw_dma_quiescent(struct drm_device *dev);
  extern int vmw_bo_map_dma(struct ttm_buffer_object *bo);
@@@ -910,7 -902,6 +910,7 @@@ int vmw_fb_init(struct vmw_private *vmw
  int vmw_fb_close(struct vmw_private *dev_priv);
  int vmw_fb_off(struct vmw_private *vmw_priv);
  int vmw_fb_on(struct vmw_private *vmw_priv);
 +void vmw_fb_refresh(struct vmw_private *vmw_priv);
  
  /**
   * Kernel modesetting - vmwgfx_kms.c
@@@ -947,8 -938,7 +947,9 @@@ int vmw_kms_present(struct vmw_private 
  int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *file_priv);
  void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv);
 +int vmw_kms_suspend(struct drm_device *dev);
 +int vmw_kms_resume(struct drm_device *dev);
+ void vmw_kms_lost_device(struct drm_device *dev);
  
  int vmw_dumb_create(struct drm_file *file_priv,
                    struct drm_device *dev,
@@@ -1176,53 -1166,6 +1177,53 @@@ extern int vmw_cmdbuf_cur_flush(struct 
                                bool interruptible);
  extern void vmw_cmdbuf_irqthread(struct vmw_cmdbuf_man *man);
  
 +/* CPU blit utilities - vmwgfx_blit.c */
 +
 +/**
 + * struct vmw_diff_cpy - CPU blit information structure
 + *
 + * @rect: The output bounding box rectangle.
 + * @line: The current line of the blit.
 + * @line_offset: Offset of the current line segment.
 + * @cpp: Bytes per pixel (granularity information).
 + * @memcpy: Which memcpy function to use.
 + */
 +struct vmw_diff_cpy {
 +      struct drm_rect rect;
 +      size_t line;
 +      size_t line_offset;
 +      int cpp;
 +      void (*do_cpy)(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
 +                     size_t n);
 +};
 +
 +#define VMW_CPU_BLIT_INITIALIZER {    \
 +      .do_cpy = vmw_memcpy,           \
 +}
 +
 +#define VMW_CPU_BLIT_DIFF_INITIALIZER(_cpp) {   \
 +      .line = 0,                                \
 +      .line_offset = 0,                         \
 +      .rect = { .x1 = INT_MAX/2,                \
 +                .y1 = INT_MAX/2,                \
 +                .x2 = INT_MIN/2,                \
 +                .y2 = INT_MIN/2                 \
 +      },                                        \
 +      .cpp = _cpp,                              \
 +      .do_cpy = vmw_diff_memcpy,                \
 +}
 +
 +void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
 +                   size_t n);
 +
 +void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n);
 +
 +int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
 +                  u32 dst_offset, u32 dst_stride,
 +                  struct ttm_buffer_object *src,
 +                  u32 src_offset, u32 src_stride,
 +                  u32 w, u32 h,
 +                  struct vmw_diff_cpy *diff);
  
  /**
   * Inline helper functions
@@@ -31,7 -31,6 +31,6 @@@
  #include <drm/drm_atomic_helper.h>
  #include <drm/drm_rect.h>
  
  /* Might need a hrtimer here? */
  #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
  
@@@ -393,13 -392,13 +392,13 @@@ vmw_du_cursor_plane_atomic_update(struc
        du->cursor_surface = vps->surf;
        du->cursor_dmabuf = vps->dmabuf;
  
 -      /* setup new image */
        if (vps->surf) {
                du->cursor_age = du->cursor_surface->snooper.age;
  
                ret = vmw_cursor_update_image(dev_priv,
                                              vps->surf->snooper.image,
 -                                            64, 64, hotspot_x, hotspot_y);
 +                                            64, 64, hotspot_x,
 +                                            hotspot_y);
        } else if (vps->dmabuf) {
                ret = vmw_cursor_update_dmabuf(dev_priv, vps->dmabuf,
                                               plane->state->crtc_w,
@@@ -443,12 -442,18 +442,12 @@@ int vmw_du_primary_plane_atomic_check(s
  {
        struct drm_crtc_state *crtc_state = NULL;
        struct drm_framebuffer *new_fb = state->fb;
 -      struct drm_rect clip = {};
        int ret;
  
        if (state->crtc)
                crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
  
 -      if (crtc_state && crtc_state->enable) {
 -              clip.x2 = crtc_state->adjusted_mode.hdisplay;
 -              clip.y2 = crtc_state->adjusted_mode.vdisplay;
 -      }
 -
 -      ret = drm_atomic_helper_check_plane_state(state, crtc_state, &clip,
 +      ret = drm_atomic_helper_check_plane_state(state, crtc_state,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  DRM_PLANE_HELPER_NO_SCALING,
                                                  false, true);
@@@ -497,22 -502,11 +496,22 @@@ int vmw_du_cursor_plane_atomic_check(st
        struct vmw_surface *surface = NULL;
        struct drm_framebuffer *fb = new_state->fb;
  
 +      struct drm_rect src = drm_plane_state_src(new_state);
 +      struct drm_rect dest = drm_plane_state_dest(new_state);
  
        /* Turning off */
        if (!fb)
                return ret;
  
 +      ret = drm_plane_helper_check_update(plane, new_state->crtc, fb,
 +                                          &src, &dest,
 +                                          DRM_MODE_ROTATE_0,
 +                                          DRM_PLANE_HELPER_NO_SCALING,
 +                                          DRM_PLANE_HELPER_NO_SCALING,
 +                                          true, true, &new_state->visible);
 +      if (!ret)
 +              return ret;
 +
        /* A lot of the code assumes this */
        if (new_state->crtc_w != 64 || new_state->crtc_h != 64) {
                DRM_ERROR("Invalid cursor dimensions (%d, %d)\n",
@@@ -577,9 -571,13 +576,9 @@@ void vmw_du_crtc_atomic_flush(struct dr
                crtc->state->event = NULL;
  
                spin_lock_irq(&crtc->dev->event_lock);
 -              if (drm_crtc_vblank_get(crtc) == 0)
 -                      drm_crtc_arm_vblank_event(crtc, event);
 -              else
 -                      drm_crtc_send_vblank_event(crtc, event);
 +              drm_crtc_send_vblank_event(crtc, event);
                spin_unlock_irq(&crtc->dev->event_lock);
        }
 -
  }
  
  
@@@ -682,6 -680,9 +681,6 @@@ vmw_du_plane_duplicate_state(struct drm
                return NULL;
  
        vps->pinned = 0;
 -
 -      /* Mapping is managed by prepare_fb/cleanup_fb */
 -      memset(&vps->host_map, 0, sizeof(vps->host_map));
        vps->cpp = 0;
  
        /* Each ref counted resource needs to be acquired again */
@@@ -743,6 -744,11 +742,6 @@@ vmw_du_plane_destroy_state(struct drm_p
  
  
        /* Should have been freed by cleanup_fb */
 -      if (vps->host_map.virtual) {
 -              DRM_ERROR("Host mapping not freed\n");
 -              ttm_bo_kunmap(&vps->host_map);
 -      }
 -
        if (vps->surf)
                vmw_surface_unreference(&vps->surf);
  
@@@ -887,11 -893,11 +886,11 @@@ static int vmw_framebuffer_surface_dirt
        if (dev_priv->active_display_unit == vmw_du_screen_object)
                ret = vmw_kms_sou_do_surface_dirty(dev_priv, &vfbs->base,
                                                   clips, NULL, NULL, 0, 0,
 -                                                 num_clips, inc, NULL);
 +                                                 num_clips, inc, NULL, NULL);
        else
                ret = vmw_kms_stdu_surface_dirty(dev_priv, &vfbs->base,
                                                 clips, NULL, NULL, 0, 0,
 -                                               num_clips, inc, NULL);
 +                                               num_clips, inc, NULL, NULL);
  
        vmw_fifo_flush(dev_priv, false);
        ttm_read_unlock(&dev_priv->reservation_sem);
@@@ -927,12 -933,11 +926,12 @@@ int vmw_kms_readback(struct vmw_privat
        switch (dev_priv->active_display_unit) {
        case vmw_du_screen_object:
                return vmw_kms_sou_readback(dev_priv, file_priv, vfb,
 -                                          user_fence_rep, vclips, num_clips);
 +                                          user_fence_rep, vclips, num_clips,
 +                                          NULL);
        case vmw_du_screen_target:
                return vmw_kms_stdu_dma(dev_priv, file_priv, vfb,
                                        user_fence_rep, NULL, vclips, num_clips,
 -                                      1, false, true);
 +                                      1, false, true, NULL);
        default:
                WARN_ONCE(true,
                          "Readback called with invalid display system.\n");
@@@ -1090,12 -1095,12 +1089,12 @@@ static int vmw_framebuffer_dmabuf_dirty
        case vmw_du_screen_target:
                ret = vmw_kms_stdu_dma(dev_priv, NULL, &vfbd->base, NULL,
                                       clips, NULL, num_clips, increment,
 -                                     true, true);
 +                                     true, true, NULL);
                break;
        case vmw_du_screen_object:
                ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, &vfbd->base,
                                                  clips, NULL, num_clips,
 -                                                increment, true, NULL);
 +                                                increment, true, NULL, NULL);
                break;
        case vmw_du_legacy:
                ret = vmw_kms_ldu_do_dmabuf_dirty(dev_priv, &vfbd->base, 0, 0,
@@@ -1121,14 -1126,12 +1120,14 @@@ static const struct drm_framebuffer_fun
  };
  
  /**
 - * Pin the dmabuffer to the start of vram.
 + * Pin the dmabuffer in a location suitable for access by the
 + * display system.
   */
  static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
  {
        struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
        struct vmw_dma_buffer *buf;
 +      struct ttm_placement *placement;
        int ret;
  
        buf = vfb->dmabuf ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
                break;
        case vmw_du_screen_object:
        case vmw_du_screen_target:
 -              if (vfb->dmabuf)
 -                      return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf,
 -                                                           false);
 +              if (vfb->dmabuf) {
 +                      if (dev_priv->capabilities & SVGA_CAP_3D) {
 +                              /*
 +                               * Use surface DMA to get content to
 +                               * sreen target surface.
 +                               */
 +                              placement = &vmw_vram_gmr_placement;
 +                      } else {
 +                              /* Use CPU blit. */
 +                              placement = &vmw_sys_placement;
 +                      }
 +              } else {
 +                      /* Use surface / image update */
 +                      placement = &vmw_mob_placement;
 +              }
  
 -              return vmw_dmabuf_pin_in_placement(dev_priv, buf,
 -                                                 &vmw_mob_placement, false);
 +              return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement,
 +                                                 false);
        default:
                return -EINVAL;
        }
@@@ -1553,10 -1544,35 +1552,10 @@@ vmw_kms_atomic_check_modeset(struct drm
        return drm_atomic_helper_check(dev, state);
  }
  
 -
 -/**
 - * vmw_kms_atomic_commit - Perform an atomic state commit
 - *
 - * @dev: DRM device
 - * @state: the driver state object
 - * @nonblock: Whether nonblocking behaviour is requested
 - *
 - * This is a simple wrapper around drm_atomic_helper_commit() for
 - * us to clear the nonblocking value.
 - *
 - * Nonblocking commits currently cause synchronization issues
 - * for vmwgfx.
 - *
 - * RETURNS
 - * Zero for success or negative error code on failure.
 - */
 -int vmw_kms_atomic_commit(struct drm_device *dev,
 -                        struct drm_atomic_state *state,
 -                        bool nonblock)
 -{
 -      return drm_atomic_helper_commit(dev, state, false);
 -}
 -
 -
  static const struct drm_mode_config_funcs vmw_kms_funcs = {
        .fb_create = vmw_kms_fb_create,
        .atomic_check = vmw_kms_atomic_check_modeset,
 -      .atomic_commit = vmw_kms_atomic_commit,
 +      .atomic_commit = drm_atomic_helper_commit,
  };
  
  static int vmw_kms_generic_present(struct vmw_private *dev_priv,
  {
        return vmw_kms_sou_do_surface_dirty(dev_priv, vfb, NULL, clips,
                                            &surface->res, destX, destY,
 -                                          num_clips, 1, NULL);
 +                                          num_clips, 1, NULL, NULL);
  }
  
  
@@@ -1589,7 -1605,7 +1588,7 @@@ int vmw_kms_present(struct vmw_private 
        case vmw_du_screen_target:
                ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, clips,
                                                 &surface->res, destX, destY,
 -                                               num_clips, 1, NULL);
 +                                               num_clips, 1, NULL, NULL);
                break;
        case vmw_du_screen_object:
                ret = vmw_kms_generic_present(dev_priv, file_priv, vfb, surface,
@@@ -2317,16 -2333,10 +2316,16 @@@ int vmw_kms_helper_dirty(struct vmw_pri
  
        dirty->dev_priv = dev_priv;
  
 -      list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
 -              if (crtc->primary->fb != &framebuffer->base)
 -                      continue;
 -              units[num_units++] = vmw_crtc_to_du(crtc);
 +      /* If crtc is passed, no need to iterate over other display units */
 +      if (dirty->crtc) {
 +              units[num_units++] = vmw_crtc_to_du(dirty->crtc);
 +      } else {
 +              list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list,
 +                                  head) {
 +                      if (crtc->primary->fb != &framebuffer->base)
 +                              continue;
 +                      units[num_units++] = vmw_crtc_to_du(crtc);
 +              }
        }
  
        for (k = 0; k < num_units; k++) {
  int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
                                  struct vmw_dma_buffer *buf,
                                  bool interruptible,
 -                                bool validate_as_mob)
 +                                bool validate_as_mob,
 +                                bool for_cpu_blit)
  {
 +      struct ttm_operation_ctx ctx = {
 +              .interruptible = interruptible,
 +              .no_wait_gpu = false};
        struct ttm_buffer_object *bo = &buf->base;
        int ret;
  
        ttm_bo_reserve(bo, false, false, NULL);
 -      ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
 -                                       validate_as_mob);
 +      if (for_cpu_blit)
 +              ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx);
 +      else
 +              ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
 +                                               validate_as_mob);
        if (ret)
                ttm_bo_unreserve(bo);
  
@@@ -2513,9 -2516,12 +2512,12 @@@ void vmw_kms_helper_buffer_finish(struc
   * Helper to be used if an error forces the caller to undo the actions of
   * vmw_kms_helper_resource_prepare.
   */
- void vmw_kms_helper_resource_revert(struct vmw_resource *res)
+ void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx)
  {
-       vmw_kms_helper_buffer_revert(res->backup);
+       struct vmw_resource *res = ctx->res;
+       vmw_kms_helper_buffer_revert(ctx->buf);
+       vmw_dmabuf_unreference(&ctx->buf);
        vmw_resource_unreserve(res, false, NULL, 0);
        mutex_unlock(&res->dev_priv->cmdbuf_mutex);
  }
   * interrupted by a signal.
   */
  int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
-                                   bool interruptible)
+                                   bool interruptible,
+                                   struct vmw_validation_ctx *ctx)
  {
        int ret = 0;
  
+       ctx->buf = NULL;
+       ctx->res = res;
        if (interruptible)
                ret = mutex_lock_interruptible(&res->dev_priv->cmdbuf_mutex);
        else
        if (res->backup) {
                ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup,
                                                    interruptible,
 -                                                  res->dev_priv->has_mob);
 +                                                  res->dev_priv->has_mob,
 +                                                  false);
                if (ret)
                        goto out_unreserve;
+               ctx->buf = vmw_dmabuf_reference(res->backup);
        }
        ret = vmw_resource_validate(res);
        if (ret)
        return 0;
  
  out_revert:
-       vmw_kms_helper_buffer_revert(res->backup);
+       vmw_kms_helper_buffer_revert(ctx->buf);
  out_unreserve:
        vmw_resource_unreserve(res, false, NULL, 0);
  out_unlock:
   * @out_fence: Optional pointer to a fence pointer. If non-NULL, a
   * ref-counted fence pointer is returned here.
   */
- void vmw_kms_helper_resource_finish(struct vmw_resource *res,
-                            struct vmw_fence_obj **out_fence)
+ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
+                                   struct vmw_fence_obj **out_fence)
  {
-       if (res->backup || out_fence)
-               vmw_kms_helper_buffer_finish(res->dev_priv, NULL, res->backup,
+       struct vmw_resource *res = ctx->res;
+       if (ctx->buf || out_fence)
+               vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
                                             out_fence, NULL);
  
        vmw_resource_unreserve(res, false, NULL, 0);
@@@ -2851,48 -2864,11 +2861,58 @@@ int vmw_kms_set_config(struct drm_mode_
  
  
  /**
 + * vmw_kms_suspend - Save modesetting state and turn modesetting off.
 + *
 + * @dev: Pointer to the drm device
 + * Return: 0 on success. Negative error code on failure.
 + */
 +int vmw_kms_suspend(struct drm_device *dev)
 +{
 +      struct vmw_private *dev_priv = vmw_priv(dev);
 +
 +      dev_priv->suspend_state = drm_atomic_helper_suspend(dev);
 +      if (IS_ERR(dev_priv->suspend_state)) {
 +              int ret = PTR_ERR(dev_priv->suspend_state);
 +
 +              DRM_ERROR("Failed kms suspend: %d\n", ret);
 +              dev_priv->suspend_state = NULL;
 +
 +              return ret;
 +      }
 +
 +      return 0;
 +}
 +
 +
 +/**
 + * vmw_kms_resume - Re-enable modesetting and restore state
 + *
 + * @dev: Pointer to the drm device
 + * Return: 0 on success. Negative error code on failure.
 + *
 + * State is resumed from a previous vmw_kms_suspend(). It's illegal
 + * to call this function without a previous vmw_kms_suspend().
 + */
 +int vmw_kms_resume(struct drm_device *dev)
 +{
 +      struct vmw_private *dev_priv = vmw_priv(dev);
 +      int ret;
 +
 +      if (WARN_ON(!dev_priv->suspend_state))
 +              return 0;
 +
 +      ret = drm_atomic_helper_resume(dev, dev_priv->suspend_state);
 +      dev_priv->suspend_state = NULL;
 +
 +      return ret;
 +}
++
++/**
+  * vmw_kms_lost_device - Notify kms that modesetting capabilities will be lost
+  *
+  * @dev: Pointer to the drm device
+  */
+ void vmw_kms_lost_device(struct drm_device *dev)
+ {
+       drm_atomic_helper_shutdown(dev);
+ }
@@@ -50,7 -50,6 +50,7 @@@
   * @unit: The current display unit. Set up by the helper before a call to @clip.
   * @cmd: The allocated fifo space. Set up by the helper before the first @clip
   * call.
 + * @crtc: The crtc for which to build dirty commands.
   * @num_hits: Number of clip rect commands for this display unit.
   * Cleared by the helper before the first @clip call. Updated by the @clip
   * callback.
@@@ -72,7 -71,6 +72,7 @@@ struct vmw_kms_dirty 
        struct vmw_private *dev_priv;
        struct vmw_display_unit *unit;
        void *cmd;
 +      struct drm_crtc *crtc;
        u32 num_hits;
        s32 fb_x;
        s32 fb_y;
@@@ -177,6 -175,7 +177,6 @@@ struct vmw_plane_state 
        int pinned;
  
        /* For CPU Blit */
 -      struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
  };
  
@@@ -241,6 -240,11 +241,11 @@@ struct vmw_display_unit 
        int set_gui_y;
  };
  
+ struct vmw_validation_ctx {
+       struct vmw_resource *res;
+       struct vmw_dma_buffer *buf;
+ };
  #define vmw_crtc_to_du(x) \
        container_of(x, struct vmw_display_unit, crtc)
  #define vmw_connector_to_du(x) \
@@@ -288,8 -292,7 +293,8 @@@ int vmw_kms_helper_dirty(struct vmw_pri
  int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
                                  struct vmw_dma_buffer *buf,
                                  bool interruptible,
 -                                bool validate_as_mob);
 +                                bool validate_as_mob,
 +                                bool for_cpu_blit);
  void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf);
  void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
                                  struct drm_file *file_priv,
                                  struct drm_vmw_fence_rep __user *
                                  user_fence_rep);
  int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
-                                   bool interruptible);
- void vmw_kms_helper_resource_revert(struct vmw_resource *res);
- void vmw_kms_helper_resource_finish(struct vmw_resource *res,
+                                   bool interruptible,
+                                   struct vmw_validation_ctx *ctx);
+ void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx);
+ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
                                    struct vmw_fence_obj **out_fence);
  int vmw_kms_readback(struct vmw_private *dev_priv,
                     struct drm_file *file_priv,
@@@ -400,23 -404,20 +406,23 @@@ int vmw_kms_sou_do_surface_dirty(struc
                                 s32 dest_x,
                                 s32 dest_y,
                                 unsigned num_clips, int inc,
 -                               struct vmw_fence_obj **out_fence);
 +                               struct vmw_fence_obj **out_fence,
 +                               struct drm_crtc *crtc);
  int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
                                struct vmw_framebuffer *framebuffer,
                                struct drm_clip_rect *clips,
                                struct drm_vmw_rect *vclips,
                                unsigned num_clips, int increment,
                                bool interruptible,
 -                              struct vmw_fence_obj **out_fence);
 +                              struct vmw_fence_obj **out_fence,
 +                              struct drm_crtc *crtc);
  int vmw_kms_sou_readback(struct vmw_private *dev_priv,
                         struct drm_file *file_priv,
                         struct vmw_framebuffer *vfb,
                         struct drm_vmw_fence_rep __user *user_fence_rep,
                         struct drm_vmw_rect *vclips,
 -                       uint32_t num_clips);
 +                       uint32_t num_clips,
 +                       struct drm_crtc *crtc);
  
  /*
   * Screen Target Display Unit functions - vmwgfx_stdu.c
@@@ -430,8 -431,7 +436,8 @@@ int vmw_kms_stdu_surface_dirty(struct v
                               s32 dest_x,
                               s32 dest_y,
                               unsigned num_clips, int inc,
 -                             struct vmw_fence_obj **out_fence);
 +                             struct vmw_fence_obj **out_fence,
 +                             struct drm_crtc *crtc);
  int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
                     struct drm_file *file_priv,
                     struct vmw_framebuffer *vfb,
                     uint32_t num_clips,
                     int increment,
                     bool to_surface,
 -                   bool interruptible);
 +                   bool interruptible,
 +                   struct drm_crtc *crtc);
  
  int vmw_kms_set_config(struct drm_mode_set *set,
                       struct drm_modeset_acquire_ctx *ctx);
  #endif
@@@ -316,21 -316,69 +316,21 @@@ static int vmw_sou_crtc_page_flip(struc
                                  struct drm_modeset_acquire_ctx *ctx)
  {
        struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 -      struct drm_framebuffer *old_fb = crtc->primary->fb;
 -      struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb);
 -      struct vmw_fence_obj *fence = NULL;
 -      struct drm_vmw_rect vclips;
        int ret;
  
        if (!vmw_kms_crtc_flippable(dev_priv, crtc))
                return -EINVAL;
  
 -      flags &= ~DRM_MODE_PAGE_FLIP_ASYNC;
 -      ret = drm_atomic_helper_page_flip(crtc, new_fb, NULL, flags, ctx);
 +      ret = drm_atomic_helper_page_flip(crtc, new_fb, event, flags, ctx);
        if (ret) {
                DRM_ERROR("Page flip error %d.\n", ret);
                return ret;
        }
  
 -      /* do a full screen dirty update */
 -      vclips.x = crtc->x;
 -      vclips.y = crtc->y;
 -      vclips.w = crtc->mode.hdisplay;
 -      vclips.h = crtc->mode.vdisplay;
 -
 -      if (vfb->dmabuf)
 -              ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb,
 -                                                NULL, &vclips, 1, 1,
 -                                                true, &fence);
 -      else
 -              ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb,
 -                                                 NULL, &vclips, NULL,
 -                                                 0, 0, 1, 1, &fence);
 -
 -
 -      if (ret != 0)
 -              goto out_no_fence;
 -      if (!fence) {
 -              ret = -EINVAL;
 -              goto out_no_fence;
 -      }
 -
 -      if (event) {
 -              struct drm_file *file_priv = event->base.file_priv;
 -
 -              ret = vmw_event_fence_action_queue(file_priv, fence,
 -                                                 &event->base,
 -                                                 &event->event.vbl.tv_sec,
 -                                                 &event->event.vbl.tv_usec,
 -                                                 true);
 -      }
 -
 -      /*
 -       * No need to hold on to this now. The only cleanup
 -       * we need to do if we fail is unref the fence.
 -       */
 -      vmw_fence_obj_unreference(&fence);
 -
        if (vmw_crtc_to_du(crtc)->is_implicit)
                vmw_kms_update_implicit_fb(dev_priv, crtc);
  
        return ret;
 -
 -out_no_fence:
 -      drm_atomic_set_fb_for_plane(crtc->primary->state, old_fb);
 -      return ret;
  }
  
  static const struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
@@@ -405,11 -453,7 +405,11 @@@ vmw_sou_primary_plane_cleanup_fb(struc
                                 struct drm_plane_state *old_state)
  {
        struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 +      struct drm_crtc *crtc = plane->state->crtc ?
 +              plane->state->crtc : old_state->crtc;
  
 +      if (vps->dmabuf)
 +              vmw_dmabuf_unpin(vmw_priv(crtc->dev), vps->dmabuf, false);
        vmw_dmabuf_unreference(&vps->dmabuf);
        vps->dmabuf_size = 0;
  
@@@ -447,17 -491,10 +447,17 @@@ vmw_sou_primary_plane_prepare_fb(struc
        }
  
        size = new_state->crtc_w * new_state->crtc_h * 4;
 +      dev_priv = vmw_priv(crtc->dev);
  
        if (vps->dmabuf) {
 -              if (vps->dmabuf_size == size)
 -                      return 0;
 +              if (vps->dmabuf_size == size) {
 +                      /*
 +                       * Note that this might temporarily up the pin-count
 +                       * to 2, until cleanup_fb() is called.
 +                       */
 +                      return vmw_dmabuf_pin_in_vram(dev_priv, vps->dmabuf,
 +                                                    true);
 +              }
  
                vmw_dmabuf_unreference(&vps->dmabuf);
                vps->dmabuf_size = 0;
        if (!vps->dmabuf)
                return -ENOMEM;
  
 -      dev_priv = vmw_priv(crtc->dev);
        vmw_svga_enable(dev_priv);
  
        /* After we have alloced the backing store might not be able to
                              &vmw_vram_ne_placement,
                              false, &vmw_dmabuf_bo_free);
        vmw_overlay_resume_all(dev_priv);
 -
 -      if (ret != 0)
 +      if (ret) {
                vps->dmabuf = NULL; /* vmw_dmabuf_init frees on error */
 -      else
 -              vps->dmabuf_size = size;
 +              return ret;
 +      }
  
 -      return ret;
 +      /*
 +       * TTM already thinks the buffer is pinned, but make sure the
 +       * pin_count is upped.
 +       */
 +      return vmw_dmabuf_pin_in_vram(dev_priv, vps->dmabuf, true);
  }
  
  
@@@ -495,71 -530,9 +495,71 @@@ vmw_sou_primary_plane_atomic_update(str
                                    struct drm_plane_state *old_state)
  {
        struct drm_crtc *crtc = plane->state->crtc;
 +      struct drm_pending_vblank_event *event = NULL;
 +      struct vmw_fence_obj *fence = NULL;
 +      int ret;
 +
 +      if (crtc && plane->state->fb) {
 +              struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 +              struct vmw_framebuffer *vfb =
 +                      vmw_framebuffer_to_vfb(plane->state->fb);
 +              struct drm_vmw_rect vclips;
 +
 +              vclips.x = crtc->x;
 +              vclips.y = crtc->y;
 +              vclips.w = crtc->mode.hdisplay;
 +              vclips.h = crtc->mode.vdisplay;
 +
 +              if (vfb->dmabuf)
 +                      ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb, NULL,
 +                                                        &vclips, 1, 1, true,
 +                                                        &fence, crtc);
 +              else
 +                      ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb, NULL,
 +                                                         &vclips, NULL, 0, 0,
 +                                                         1, 1, &fence, crtc);
 +
 +              /*
 +               * We cannot really fail this function, so if we do, then output
 +               * an error and maintain consistent atomic state.
 +               */
 +              if (ret != 0)
 +                      DRM_ERROR("Failed to update screen.\n");
  
 -      if (crtc)
                crtc->primary->fb = plane->state->fb;
 +      } else {
 +              /*
 +               * When disabling a plane, CRTC and FB should always be NULL
 +               * together, otherwise it's an error.
 +               * Here primary plane is being disable so should really blank
 +               * the screen object display unit, if not already done.
 +               */
 +              return;
 +      }
 +
 +      event = crtc->state->event;
 +      /*
 +       * In case of failure and other cases, vblank event will be sent in
 +       * vmw_du_crtc_atomic_flush.
 +       */
 +      if (event && fence) {
 +              struct drm_file *file_priv = event->base.file_priv;
 +
 +              ret = vmw_event_fence_action_queue(file_priv,
 +                                                 fence,
 +                                                 &event->base,
 +                                                 &event->event.vbl.tv_sec,
 +                                                 &event->event.vbl.tv_usec,
 +                                                 true);
 +
 +              if (unlikely(ret != 0))
 +                      DRM_ERROR("Failed to queue event on fence.\n");
 +              else
 +                      crtc->state->event = NULL;
 +      }
 +
 +      if (fence)
 +              vmw_fence_obj_unreference(&fence);
  }
  
  
@@@ -919,7 -892,6 +919,7 @@@ static void vmw_sou_surface_clip(struc
   * @out_fence: If non-NULL, will return a ref-counted pointer to a
   * struct vmw_fence_obj. The returned fence pointer may be NULL in which
   * case the device has already synchronized.
 + * @crtc: If crtc is passed, perform surface dirty on that crtc only.
   *
   * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
   * interrupted.
@@@ -932,18 -904,18 +932,19 @@@ int vmw_kms_sou_do_surface_dirty(struc
                                 s32 dest_x,
                                 s32 dest_y,
                                 unsigned num_clips, int inc,
 -                               struct vmw_fence_obj **out_fence)
 +                               struct vmw_fence_obj **out_fence,
 +                               struct drm_crtc *crtc)
  {
        struct vmw_framebuffer_surface *vfbs =
                container_of(framebuffer, typeof(*vfbs), base);
        struct vmw_kms_sou_surface_dirty sdirty;
+       struct vmw_validation_ctx ctx;
        int ret;
  
        if (!srf)
                srf = &vfbs->surface->res;
  
-       ret = vmw_kms_helper_resource_prepare(srf, true);
+       ret = vmw_kms_helper_resource_prepare(srf, true, &ctx);
        if (ret)
                return ret;
  
        sdirty.base.dev_priv = dev_priv;
        sdirty.base.fifo_reserve_size = sizeof(struct vmw_kms_sou_dirty_cmd) +
          sizeof(SVGASignedRect) * num_clips;
 +      sdirty.base.crtc = crtc;
  
        sdirty.sid = srf->id;
        sdirty.left = sdirty.top = S32_MAX;
        ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
                                   dest_x, dest_y, num_clips, inc,
                                   &sdirty.base);
-       vmw_kms_helper_resource_finish(srf, out_fence);
+       vmw_kms_helper_resource_finish(&ctx, out_fence);
  
        return ret;
  }
@@@ -1024,7 -995,6 +1025,7 @@@ static void vmw_sou_dmabuf_clip(struct 
   * @out_fence: If non-NULL, will return a ref-counted pointer to a
   * struct vmw_fence_obj. The returned fence pointer may be NULL in which
   * case the device has already synchronized.
 + * @crtc: If crtc is passed, perform dmabuf dirty on that crtc only.
   *
   * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
   * interrupted.
@@@ -1035,8 -1005,7 +1036,8 @@@ int vmw_kms_sou_do_dmabuf_dirty(struct 
                                struct drm_vmw_rect *vclips,
                                unsigned num_clips, int increment,
                                bool interruptible,
 -                              struct vmw_fence_obj **out_fence)
 +                              struct vmw_fence_obj **out_fence,
 +                              struct drm_crtc *crtc)
  {
        struct vmw_dma_buffer *buf =
                container_of(framebuffer, struct vmw_framebuffer_dmabuf,
        int ret;
  
        ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
 -                                          false);
 +                                          false, false);
        if (ret)
                return ret;
  
        if (unlikely(ret != 0))
                goto out_revert;
  
 +      dirty.crtc = crtc;
        dirty.fifo_commit = vmw_sou_dmabuf_fifo_commit;
        dirty.clip = vmw_sou_dmabuf_clip;
        dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_dmabuf_blit) *
@@@ -1125,7 -1093,6 +1126,7 @@@ static void vmw_sou_readback_clip(struc
   * Must be set to non-NULL if @file_priv is non-NULL.
   * @vclips: Array of clip rects.
   * @num_clips: Number of clip rects in @vclips.
 + * @crtc: If crtc is passed, readback on that crtc only.
   *
   * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
   * interrupted.
@@@ -1135,16 -1102,14 +1136,16 @@@ int vmw_kms_sou_readback(struct vmw_pri
                         struct vmw_framebuffer *vfb,
                         struct drm_vmw_fence_rep __user *user_fence_rep,
                         struct drm_vmw_rect *vclips,
 -                       uint32_t num_clips)
 +                       uint32_t num_clips,
 +                       struct drm_crtc *crtc)
  {
        struct vmw_dma_buffer *buf =
                container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
        struct vmw_kms_dirty dirty;
        int ret;
  
 -      ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false);
 +      ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false,
 +                                          false);
        if (ret)
                return ret;
  
        if (unlikely(ret != 0))
                goto out_revert;
  
 +      dirty.crtc = crtc;
        dirty.fifo_commit = vmw_sou_readback_fifo_commit;
        dirty.clip = vmw_sou_readback_clip;
        dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_readback_blit) *
@@@ -114,6 -114,7 +114,6 @@@ struct vmw_screen_target_display_unit 
        bool defined;
  
        /* For CPU Blit */
 -      struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
  };
  
@@@ -491,17 -492,71 +491,17 @@@ static int vmw_stdu_crtc_page_flip(stru
  {
        struct vmw_private *dev_priv = vmw_priv(crtc->dev);
        struct vmw_screen_target_display_unit *stdu = vmw_crtc_to_stdu(crtc);
 -      struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb);
 -      struct drm_vmw_rect vclips;
        int ret;
  
 -      dev_priv          = vmw_priv(crtc->dev);
 -      stdu              = vmw_crtc_to_stdu(crtc);
 -
        if (!stdu->defined || !vmw_kms_crtc_flippable(dev_priv, crtc))
                return -EINVAL;
  
 -      /*
 -       * We're always async, but the helper doesn't know how to set async
 -       * so lie to the helper. Also, the helper expects someone
 -       * to pick the event up from the crtc state, and if nobody does,
 -       * it will free it. Since we handle the event in this function,
 -       * don't hand it to the helper.
 -       */
 -      flags &= ~DRM_MODE_PAGE_FLIP_ASYNC;
 -      ret = drm_atomic_helper_page_flip(crtc, new_fb, NULL, flags, ctx);
 +      ret = drm_atomic_helper_page_flip(crtc, new_fb, event, flags, ctx);
        if (ret) {
                DRM_ERROR("Page flip error %d.\n", ret);
                return ret;
        }
  
 -      if (stdu->base.is_implicit)
 -              vmw_kms_update_implicit_fb(dev_priv, crtc);
 -
 -      /*
 -       * Now that we've bound a new surface to the screen target,
 -       * update the contents.
 -       */
 -      vclips.x = crtc->x;
 -      vclips.y = crtc->y;
 -      vclips.w = crtc->mode.hdisplay;
 -      vclips.h = crtc->mode.vdisplay;
 -
 -      if (vfb->dmabuf)
 -              ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, &vclips,
 -                                     1, 1, true, false);
 -      else
 -              ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, &vclips,
 -                                               NULL, 0, 0, 1, 1, NULL);
 -      if (ret) {
 -              DRM_ERROR("Page flip update error %d.\n", ret);
 -              return ret;
 -      }
 -
 -      if (event) {
 -              struct vmw_fence_obj *fence = NULL;
 -              struct drm_file *file_priv = event->base.file_priv;
 -
 -              vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
 -              if (!fence)
 -                      return -ENOMEM;
 -
 -              ret = vmw_event_fence_action_queue(file_priv, fence,
 -                                                 &event->base,
 -                                                 &event->event.vbl.tv_sec,
 -                                                 &event->event.vbl.tv_usec,
 -                                                 true);
 -              vmw_fence_obj_unreference(&fence);
 -      } else {
 -              (void) vmw_fifo_flush(dev_priv, false);
 -      }
 -
        return 0;
  }
  
@@@ -638,9 -693,10 +638,9 @@@ static void vmw_stdu_dmabuf_cpu_commit(
                container_of(dirty->unit, typeof(*stdu), base);
        s32 width, height;
        s32 src_pitch, dst_pitch;
 -      u8 *src, *dst;
 -      bool not_used;
 -      struct ttm_bo_kmap_obj guest_map;
 -      int ret;
 +      struct ttm_buffer_object *src_bo, *dst_bo;
 +      u32 src_offset, dst_offset;
 +      struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
  
        if (!dirty->num_hits)
                return;
        if (width == 0 || height == 0)
                return;
  
 -      ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
 -                        &guest_map);
 -      if (ret) {
 -              DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
 -                        ret);
 -              goto out_cleanup;
 -      }
 -
 -      /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
 -      src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
 -      src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
 -      src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 -
 -      dst_pitch = ddirty->pitch;
 -      dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
 -      dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 -
 -
 -      /* Figure out the real direction */
 -      if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
 -              u8 *tmp;
 -              s32 tmp_pitch;
 -
 -              tmp = src;
 -              tmp_pitch = src_pitch;
 +      /* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */
 +      dst_pitch = stdu->display_srf->base_size.width * stdu->cpp;
 +      dst_bo = &stdu->display_srf->res.backup->base;
 +      dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
  
 -              src = dst;
 -              src_pitch = dst_pitch;
 +      src_pitch = ddirty->pitch;
 +      src_bo = &ddirty->buf->base;
 +      src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
  
 -              dst = tmp;
 -              dst_pitch = tmp_pitch;
 +      /* Swap src and dst if the assumption was wrong. */
 +      if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
 +              swap(dst_pitch, src_pitch);
 +              swap(dst_bo, src_bo);
 +              swap(src_offset, dst_offset);
        }
  
 -      /* CPU Blit */
 -      while (height-- > 0) {
 -              memcpy(dst, src, width * stdu->cpp);
 -              dst += dst_pitch;
 -              src += src_pitch;
 -      }
 +      (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
 +                             src_bo, src_offset, src_pitch,
 +                             width * stdu->cpp, height, &diff);
  
 -      if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
 +      if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
 +          drm_rect_visible(&diff.rect)) {
                struct vmw_private *dev_priv;
                struct vmw_stdu_update *cmd;
                struct drm_clip_rect region;
                int ret;
  
                /* We are updating the actual surface, not a proxy */
 -              region.x1 = ddirty->left;
 -              region.x2 = ddirty->right;
 -              region.y1 = ddirty->top;
 -              region.y2 = ddirty->bottom;
 +              region.x1 = diff.rect.x1;
 +              region.x2 = diff.rect.x2;
 +              region.y1 = diff.rect.y1;
 +              region.y2 = diff.rect.y2;
                ret = vmw_kms_update_proxy(
                        (struct vmw_resource *) &stdu->display_srf->res,
                        (const struct drm_clip_rect *) &region, 1, 1);
                }
  
                vmw_stdu_populate_update(cmd, stdu->base.unit,
 -                                       ddirty->left, ddirty->right,
 -                                       ddirty->top, ddirty->bottom);
 +                                       region.x1, region.x2,
 +                                       region.y1, region.y2);
  
                vmw_fifo_commit(dev_priv, sizeof(*cmd));
        }
  
 -      ttm_bo_kunmap(&guest_map);
  out_cleanup:
        ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
        ddirty->right = ddirty->bottom = S32_MIN;
   * @to_surface: Whether to DMA to the screen target system as opposed to
   * from the screen target system.
   * @interruptible: Whether to perform waits interruptible if possible.
 + * @crtc: If crtc is passed, perform stdu dma on that crtc only.
   *
   * If DMA-ing till the screen target system, the function will also notify
   * the screen target system that a bounding box of the cliprects has been
@@@ -743,22 -818,15 +743,22 @@@ int vmw_kms_stdu_dma(struct vmw_privat
                     uint32_t num_clips,
                     int increment,
                     bool to_surface,
 -                   bool interruptible)
 +                   bool interruptible,
 +                   struct drm_crtc *crtc)
  {
        struct vmw_dma_buffer *buf =
                container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
        struct vmw_stdu_dirty ddirty;
        int ret;
 +      bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D);
  
 +      /*
 +       * VMs without 3D support don't have the surface DMA command and
 +       * we'll be using a CPU blit, and the framebuffer should be moved out
 +       * of VRAM.
 +       */
        ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
 -                                          false);
 +                                          false, cpu_blit);
        if (ret)
                return ret;
  
        if (to_surface)
                ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);
  
 -      /* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */
 -      if (!(dev_priv->capabilities & SVGA_CAP_3D)) {
 +
 +      if (cpu_blit) {
                ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit;
                ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip;
                ddirty.base.fifo_reserve_size = 0;
        }
  
 +      ddirty.base.crtc = crtc;
 +
        ret = vmw_kms_helper_dirty(dev_priv, vfb, clips, vclips,
                                   0, 0, num_clips, increment, &ddirty.base);
        vmw_kms_helper_buffer_finish(dev_priv, file_priv, buf, NULL,
@@@ -897,7 -963,6 +897,7 @@@ static void vmw_kms_stdu_surface_fifo_c
   * @out_fence: If non-NULL, will return a ref-counted pointer to a
   * struct vmw_fence_obj. The returned fence pointer may be NULL in which
   * case the device has already synchronized.
 + * @crtc: If crtc is passed, perform surface dirty on that crtc only.
   *
   * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
   * interrupted.
@@@ -910,18 -975,18 +910,19 @@@ int vmw_kms_stdu_surface_dirty(struct v
                               s32 dest_x,
                               s32 dest_y,
                               unsigned num_clips, int inc,
 -                             struct vmw_fence_obj **out_fence)
 +                             struct vmw_fence_obj **out_fence,
 +                             struct drm_crtc *crtc)
  {
        struct vmw_framebuffer_surface *vfbs =
                container_of(framebuffer, typeof(*vfbs), base);
        struct vmw_stdu_dirty sdirty;
+       struct vmw_validation_ctx ctx;
        int ret;
  
        if (!srf)
                srf = &vfbs->surface->res;
  
-       ret = vmw_kms_helper_resource_prepare(srf, true);
+       ret = vmw_kms_helper_resource_prepare(srf, true, &ctx);
        if (ret)
                return ret;
  
        sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) +
                sizeof(SVGA3dCopyBox) * num_clips +
                sizeof(struct vmw_stdu_update);
 +      sdirty.base.crtc = crtc;
        sdirty.sid = srf->id;
        sdirty.left = sdirty.top = S32_MAX;
        sdirty.right = sdirty.bottom = S32_MIN;
                                   dest_x, dest_y, num_clips, inc,
                                   &sdirty.base);
  out_finish:
-       vmw_kms_helper_resource_finish(srf, out_fence);
+       vmw_kms_helper_resource_finish(&ctx, out_fence);
  
        return ret;
  }
@@@ -1055,6 -1119,9 +1056,6 @@@ vmw_stdu_primary_plane_cleanup_fb(struc
  {
        struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
  
 -      if (vps->host_map.virtual)
 -              ttm_bo_kunmap(&vps->host_map);
 -
        if (vps->surf)
                WARN_ON(!vps->pinned);
  
@@@ -1216,11 -1283,24 +1217,11 @@@ vmw_stdu_primary_plane_prepare_fb(struc
         * so cache these mappings
         */
        if (vps->content_fb_type == SEPARATE_DMA &&
 -          !(dev_priv->capabilities & SVGA_CAP_3D)) {
 -              ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
 -                                vps->surf->res.backup->base.num_pages,
 -                                &vps->host_map);
 -              if (ret) {
 -                      DRM_ERROR("Failed to map display buffer to CPU\n");
 -                      goto out_srf_unpin;
 -              }
 -
 +          !(dev_priv->capabilities & SVGA_CAP_3D))
                vps->cpp = new_fb->pitches[0] / new_fb->width;
 -      }
  
        return 0;
  
 -out_srf_unpin:
 -      vmw_resource_unpin(&vps->surf->res);
 -      vps->pinned--;
 -
  out_srf_unref:
        vmw_surface_unreference(&vps->surf);
        return ret;
@@@ -1243,104 -1323,41 +1244,104 @@@ static voi
  vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
                                     struct drm_plane_state *old_state)
  {
 -      struct vmw_private *dev_priv;
 -      struct vmw_screen_target_display_unit *stdu;
        struct vmw_plane_state *vps = vmw_plane_state_to_vps(plane->state);
 -      struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc;
 +      struct drm_crtc *crtc = plane->state->crtc;
 +      struct vmw_screen_target_display_unit *stdu;
 +      struct drm_pending_vblank_event *event;
 +      struct vmw_private *dev_priv;
        int ret;
  
 -      stdu     = vmw_crtc_to_stdu(crtc);
 -      dev_priv = vmw_priv(crtc->dev);
 +      /*
 +       * We cannot really fail this function, so if we do, then output an
 +       * error and maintain consistent atomic state.
 +       */
 +      if (crtc && plane->state->fb) {
 +              struct vmw_framebuffer *vfb =
 +                      vmw_framebuffer_to_vfb(plane->state->fb);
 +              struct drm_vmw_rect vclips;
 +              stdu = vmw_crtc_to_stdu(crtc);
 +              dev_priv = vmw_priv(crtc->dev);
 +
 +              stdu->display_srf = vps->surf;
 +              stdu->content_fb_type = vps->content_fb_type;
 +              stdu->cpp = vps->cpp;
 +
 +              vclips.x = crtc->x;
 +              vclips.y = crtc->y;
 +              vclips.w = crtc->mode.hdisplay;
 +              vclips.h = crtc->mode.vdisplay;
 +
 +              ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res);
 +              if (ret)
 +                      DRM_ERROR("Failed to bind surface to STDU.\n");
 +
 +              if (vfb->dmabuf)
 +                      ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL,
 +                                             &vclips, 1, 1, true, false,
 +                                             crtc);
 +              else
 +                      ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL,
 +                                                       &vclips, NULL, 0, 0,
 +                                                       1, 1, NULL, crtc);
 +              if (ret)
 +                      DRM_ERROR("Failed to update STDU.\n");
  
 -      stdu->display_srf = vps->surf;
 -      stdu->content_fb_type = vps->content_fb_type;
 -      stdu->cpp = vps->cpp;
 -      memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 +              crtc->primary->fb = plane->state->fb;
 +      } else {
 +              crtc = old_state->crtc;
 +              stdu = vmw_crtc_to_stdu(crtc);
 +              dev_priv = vmw_priv(crtc->dev);
  
 -      if (!stdu->defined)
 -              return;
 +              /*
 +               * When disabling a plane, CRTC and FB should always be NULL
 +               * together, otherwise it's an error.
 +               * Here primary plane is being disable so blank the screen
 +               * target display unit, if not already done.
 +               */
 +              if (!stdu->defined)
 +                      return;
  
 -      if (plane->state->fb)
 -              ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res);
 -      else
                ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
 +              if (ret)
 +                      DRM_ERROR("Failed to blank STDU\n");
 +
 +              ret = vmw_stdu_update_st(dev_priv, stdu);
 +              if (ret)
 +                      DRM_ERROR("Failed to update STDU.\n");
 +
 +              return;
 +      }
  
 +      event = crtc->state->event;
        /*
 -       * We cannot really fail this function, so if we do, then output an
 -       * error and quit
 +       * In case of failure and other cases, vblank event will be sent in
 +       * vmw_du_crtc_atomic_flush.
         */
 -      if (ret)
 -              DRM_ERROR("Failed to bind surface to STDU.\n");
 -      else
 -              crtc->primary->fb = plane->state->fb;
 +      if (event && (ret == 0)) {
 +              struct vmw_fence_obj *fence = NULL;
 +              struct drm_file *file_priv = event->base.file_priv;
  
 -      ret = vmw_stdu_update_st(dev_priv, stdu);
 +              vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
  
 -      if (ret)
 -              DRM_ERROR("Failed to update STDU.\n");
 +              /*
 +               * If fence is NULL, then already sync.
 +               */
 +              if (fence) {
 +                      ret = vmw_event_fence_action_queue(
 +                              file_priv, fence, &event->base,
 +                              &event->event.vbl.tv_sec,
 +                              &event->event.vbl.tv_usec,
 +                              true);
 +                      if (ret)
 +                              DRM_ERROR("Failed to queue event on fence.\n");
 +                      else
 +                              crtc->state->event = NULL;
 +
 +                      vmw_fence_obj_unreference(&fence);
 +              }
 +      } else {
 +              (void) vmw_fifo_flush(dev_priv, false);
 +      }
  }
  
  
diff --combined drivers/pci/quirks.c
@@@ -26,7 -26,6 +26,7 @@@
  #include <linux/ktime.h>
  #include <linux/mm.h>
  #include <linux/platform_data/x86/apple.h>
 +#include <linux/pm_runtime.h>
  #include <asm/dma.h>  /* isa_dma_bridge_buggy */
  #include "pci.h"
  
@@@ -3420,22 -3419,29 +3420,29 @@@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_I
  
  static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
  {
-       pci_set_vpd_size(dev, 8192);
- }
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd);
- DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd);
+       int chip = (dev->device & 0xf000) >> 12;
+       int func = (dev->device & 0x0f00) >>  8;
+       int prod = (dev->device & 0x00ff) >>  0;
+       /*
+        * If this is a T3-based adapter, there's a 1KB VPD area at offset
+        * 0xc00 which contains the preferred VPD values.  If this is a T4 or
+        * later based adapter, the special VPD is at offset 0x400 for the
+        * Physical Functions (the SR-IOV Virtual Functions have no VPD
+        * Capabilities).  The PCI VPD Access core routines will normally
+        * compute the size of the VPD by parsing the VPD Data Structure at
+        * offset 0x000.  This will result in silent failures when attempting
+        * to accesses these other VPD areas which are beyond those computed
+        * limits.
+        */
+       if (chip == 0x0 && prod >= 0x20)
+               pci_set_vpd_size(dev, 8192);
+       else if (chip >= 0x4 && func < 0x8)
+               pci_set_vpd_size(dev, 2048);
+ }
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
+                       quirk_chelsio_extend_vpd);
  
  #ifdef CONFIG_ACPI
  /*
@@@ -3902,6 -3908,8 +3909,8 @@@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_
                         quirk_dma_func1_alias);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642,
                         quirk_dma_func1_alias);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645,
+                        quirk_dma_func1_alias);
  /* https://bugs.gentoo.org/show_bug.cgi?id=497630 */
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON,
                         PCI_DEVICE_ID_JMICRON_JMB388_ESD,
@@@ -4833,41 -4841,3 +4842,41 @@@ static void quirk_fsl_no_msi(struct pci
                pdev->no_msi = 1;
  }
  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, quirk_fsl_no_msi);
 +
 +/*
 + * GPUs with integrated HDA controller for streaming audio to attached displays
 + * need a device link from the HDA controller (consumer) to the GPU (supplier)
 + * so that the GPU is powered up whenever the HDA controller is accessed.
 + * The GPU and HDA controller are functions 0 and 1 of the same PCI device.
 + * The device link stays in place until shutdown (or removal of the PCI device
 + * if it's hotplugged).  Runtime PM is allowed by default on the HDA controller
 + * to prevent it from permanently keeping the GPU awake.
 + */
 +static void quirk_gpu_hda(struct pci_dev *hda)
 +{
 +      struct pci_dev *gpu;
 +
 +      if (PCI_FUNC(hda->devfn) != 1)
 +              return;
 +
 +      gpu = pci_get_domain_bus_and_slot(pci_domain_nr(hda->bus),
 +                                        hda->bus->number,
 +                                        PCI_DEVFN(PCI_SLOT(hda->devfn), 0));
 +      if (!gpu || (gpu->class >> 16) != PCI_BASE_CLASS_DISPLAY) {
 +              pci_dev_put(gpu);
 +              return;
 +      }
 +
 +      if (!device_link_add(&hda->dev, &gpu->dev,
 +                           DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME))
 +              pci_err(hda, "cannot link HDA to GPU %s\n", pci_name(gpu));
 +
 +      pm_runtime_allow(&hda->dev);
 +      pci_dev_put(gpu);
 +}
 +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
 +                            PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
 +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMD, PCI_ANY_ID,
 +                            PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
 +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 +                            PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda);
@@@ -186,6 -186,10 +186,10 @@@ module_param(power_save, xint, 0644)
  MODULE_PARM_DESC(power_save, "Automatic power-saving timeout "
                 "(in second, 0 = disable).");
  
+ static bool pm_blacklist = true;
+ module_param(pm_blacklist, bool, 0644);
+ MODULE_PARM_DESC(pm_blacklist, "Enable power-management blacklist");
  /* reset the HD-audio controller in power save mode.
   * this may give more power-saving, but will take longer time to
   * wake up.
@@@ -371,6 -375,7 +375,7 @@@ enum 
                                        ((pci)->device == 0x160c))
  
  #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+ #define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
  
  static char *driver_short_names[] = {
        [AZX_DRIVER_ICH] = "HDA Intel",
@@@ -1227,7 -1232,6 +1232,7 @@@ static void azx_vs_set_state(struct pci
        struct snd_card *card = pci_get_drvdata(pci);
        struct azx *chip = card->private_data;
        struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
 +      struct hda_codec *codec;
        bool disabled;
  
        wait_for_completion(&hda->probe_wait);
                dev_info(chip->card->dev, "%s via vga_switcheroo\n",
                         disabled ? "Disabling" : "Enabling");
                if (disabled) {
 -                      pm_runtime_put_sync_suspend(card->dev);
 -                      azx_suspend(card->dev);
 +                      list_for_each_codec(codec, &chip->bus) {
 +                              pm_runtime_suspend(hda_codec_dev(codec));
 +                              pm_runtime_disable(hda_codec_dev(codec));
 +                      }
 +                      pm_runtime_suspend(card->dev);
 +                      pm_runtime_disable(card->dev);
                        /* when we get suspended by vga_switcheroo we end up in D3cold,
                         * however we have no ACPI handle, so pci/acpi can't put us there,
                         * put ourselves there */
                                         "Cannot lock devices!\n");
                } else {
                        snd_hda_unlock_devices(&chip->bus);
 -                      pm_runtime_get_noresume(card->dev);
                        chip->disabled = false;
 -                      azx_resume(card->dev);
 +                      pm_runtime_enable(card->dev);
 +                      list_for_each_codec(codec, &chip->bus) {
 +                              pm_runtime_enable(hda_codec_dev(codec));
 +                              pm_runtime_resume(hda_codec_dev(codec));
 +                      }
                }
        }
  }
@@@ -1303,7 -1300,6 +1308,7 @@@ static void init_vga_switcheroo(struct 
                dev_info(chip->card->dev,
                         "Handle vga_switcheroo audio client\n");
                hda->use_vga_switcheroo = 1;
 +              chip->driver_caps |= AZX_DCAPS_PM_RUNTIME;
                pci_dev_put(p);
        }
  }
@@@ -1329,6 -1325,9 +1334,6 @@@ static int register_vga_switcheroo(stru
                return err;
        hda->vga_switcheroo_registered = 1;
  
 -      /* register as an optimus hdmi audio power domain */
 -      vga_switcheroo_init_domain_pm_optimus_hdmi_audio(chip->card->dev,
 -                                                       &hda->hdmi_pm_domain);
        return 0;
  }
  #else
@@@ -1357,8 -1356,10 +1362,8 @@@ static int azx_free(struct azx *chip
        if (use_vga_switcheroo(hda)) {
                if (chip->disabled && hda->probe_continued)
                        snd_hda_unlock_devices(&chip->bus);
 -              if (hda->vga_switcheroo_registered) {
 +              if (hda->vga_switcheroo_registered)
                        vga_switcheroo_unregister_client(chip->pci);
 -                      vga_switcheroo_fini_domain_pm_ops(chip->card->dev);
 -              }
        }
  
        if (bus->chip_init) {
@@@ -1744,6 -1745,10 +1749,10 @@@ static int azx_create(struct snd_card *
        else
                chip->bdl_pos_adj = bdl_pos_adj[dev];
  
+       /* Workaround for a communication error on CFL (bko#199007) */
+       if (IS_CFL(pci))
+               chip->polling_mode = 1;
        err = azx_bus_init(chip, model[dev], &pci_hda_io_ops);
        if (err < 0) {
                kfree(hda);
@@@ -2190,6 -2195,24 +2199,24 @@@ out_free
        return err;
  }
  
+ #ifdef CONFIG_PM
+ /* On some boards setting power_save to a non 0 value leads to clicking /
+  * popping sounds when ever we enter/leave powersaving mode. Ideally we would
+  * figure out how to avoid these sounds, but that is not always feasible.
+  * So we keep a list of devices where we disable powersaving as its known
+  * to causes problems on these devices.
+  */
+ static struct snd_pci_quirk power_save_blacklist[] = {
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+       SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+       SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+       /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
+       SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
+       {}
+ };
+ #endif /* CONFIG_PM */
  /* number of codec slots for each chipset: 0 = default slots (i.e. 4) */
  static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {
        [AZX_DRIVER_NVIDIA] = 8,
@@@ -2201,8 -2224,8 +2228,9 @@@ static int azx_probe_continue(struct az
        struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
 +      struct hda_codec *codec;
        int dev = chip->dev_index;
+       int val;
        int err;
  
        hda->probe_continued = 1;
        chip->running = 1;
        azx_add_card_list(chip);
  
-       snd_hda_set_power_save(&chip->bus, power_save * 1000);
+       val = power_save;
+ #ifdef CONFIG_PM
+       if (pm_blacklist) {
+               const struct snd_pci_quirk *q;
+               q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist);
+               if (q && val) {
+                       dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n",
+                                q->subvendor, q->subdevice);
+                       val = 0;
+               }
+       }
+ #endif /* CONFIG_PM */
 +      /*
 +       * The discrete GPU cannot power down unless the HDA controller runtime
 +       * suspends, so activate runtime PM on codecs even if power_save == 0.
 +       */
 +      if (use_vga_switcheroo(hda))
 +              list_for_each_codec(codec, &chip->bus)
 +                      codec->auto_runtime_pm = 1;
 +
 -      if (azx_has_pm_runtime(chip) || hda->use_vga_switcheroo)
+       snd_hda_set_power_save(&chip->bus, val * 1000);
 +      if (azx_has_pm_runtime(chip))
                pm_runtime_put_autosuspend(&pci->dev);
  
  out_free: