98 files changed, 995 insertions, 650 deletions
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index a2cc6be97983..b792194e0d9c 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -67,6 +67,7 @@ static int ast_detect_chip(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
 	uint32_t data, jreg;
+	ast_open_key(ast);
 
 	if (dev->pdev->device == PCI_CHIP_AST1180) {
 		ast->chip = AST1100;
@@ -104,7 +105,7 @@ static int ast_detect_chip(struct drm_device *dev)
 			}
 			ast->vga2_clone = false;
 		} else {
-			ast->chip = 2000;
+			ast->chip = AST2000;
 			DRM_INFO("AST 2000 detected\n");
 		}
 	}
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index 4c761dcea972..05c01ea85294 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -99,6 +99,7 @@ static struct ast_vbios_dclk_info dclk_table[] = {
 	{0x25, 0x65, 0x80},					/* 16: VCLK88.75    */
 	{0x77, 0x58, 0x80},					/* 17: VCLK119      */
 	{0x32, 0x67, 0x80},				    /* 18: VCLK85_5     */
+	{0x6a, 0x6d, 0x80},					/* 19: VCLK97_75	*/
 };
 
 static struct ast_vbios_stdtable vbios_stdtable[] = {
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 9d7346b92653..6b7efcf363d6 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -250,6 +250,7 @@ static void bochs_connector_init(struct drm_device *dev)
 			   DRM_MODE_CONNECTOR_VIRTUAL);
 	drm_connector_helper_add(connector,
 				 &bochs_connector_connector_helper_funcs);
+	drm_connector_register(connector);
 }
 
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index e1c5c3222129..c7c5a9d91fa0 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -555,6 +555,7 @@ static struct drm_connector *cirrus_vga_init(struct drm_device *dev)
 
 	drm_connector_helper_add(connector, &cirrus_vga_connector_helper_funcs);
 
+	drm_connector_register(connector);
 	return connector;
 }
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index fa2be249999c..90e773019eac 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -4696,8 +4696,9 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev,
 		return -EINVAL;
 
 	/* overflow checks for 32bit size calculations */
+	/* NOTE: DIV_ROUND_UP() can overflow */
 	cpp = DIV_ROUND_UP(args->bpp, 8);
-	if (cpp > 0xffffffffU / args->width)
+	if (!cpp || cpp > 0xffffffffU / args->width)
 		return -EINVAL;
 	stride = cpp * args->width;
 	if (args->height > 0xffffffffU / stride)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2e7f03ad5ee2..9933c26017ed 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1336,12 +1336,17 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	intel_power_domains_init_hw(dev_priv);
 
+	/*
+	 * We enable some interrupt sources in our postinstall hooks, so mark
+	 * interrupts as enabled _before_ actually enabling them to avoid
+	 * special cases in our ordering checks.
+	 */
+	dev_priv->pm._irqs_disabled = false;
+
 	ret = drm_irq_install(dev, dev->pdev->irq);
 	if (ret)
 		goto cleanup_gem_stolen;
 
-	dev_priv->pm._irqs_disabled = false;
-
 	/* Important: The output setup functions called by modeset_init need
 	 * working irqs for e.g. gmbus and dp aux transfers. */
 	intel_modeset_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ec96f9a9724c..e27cdbe9d524 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -494,6 +494,36 @@ bool i915_semaphore_is_enabled(struct drm_device *dev)
 	return true;
 }
 
+void intel_hpd_cancel_work(struct drm_i915_private *dev_priv)
+{
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	dev_priv->long_hpd_port_mask = 0;
+	dev_priv->short_hpd_port_mask = 0;
+	dev_priv->hpd_event_bits = 0;
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+
+	cancel_work_sync(&dev_priv->dig_port_work);
+	cancel_work_sync(&dev_priv->hotplug_work);
+	cancel_delayed_work_sync(&dev_priv->hotplug_reenable_work);
+}
+
+static void intel_suspend_encoders(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_encoder *encoder;
+
+	drm_modeset_lock_all(dev);
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+
+		if (intel_encoder->suspend)
+			intel_encoder->suspend(intel_encoder);
+	}
+	drm_modeset_unlock_all(dev);
+}
+
 static int i915_drm_freeze(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -538,6 +568,9 @@ static int i915_drm_freeze(struct drm_device *dev)
 		flush_delayed_work(&dev_priv->rps.delayed_resume_work);
 
 		intel_runtime_pm_disable_interrupts(dev);
+		intel_hpd_cancel_work(dev_priv);
+
+		intel_suspend_encoders(dev_priv);
 
 		intel_suspend_gt_powersave(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4412f6a4383b..3524306d8cfb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -184,6 +184,7 @@ enum hpd_pin {
 		if ((1 << (domain)) & (mask))
 
 struct drm_i915_private;
+struct i915_mm_struct;
 struct i915_mmu_object;
 
 enum intel_dpll_id {
@@ -1458,7 +1459,7 @@ struct drm_i915_private {
 		} hpd_mark;
 	} hpd_stats[HPD_NUM_PINS];
 	u32 hpd_event_bits;
-	struct timer_list hotplug_reenable_timer;
+	struct delayed_work hotplug_reenable_work;
 
 	struct i915_fbc fbc;
 	struct i915_drrs drrs;
@@ -1506,9 +1507,8 @@ struct drm_i915_private {
 	struct i915_gtt gtt; /* VM representing the global address space */
 
 	struct i915_gem_mm mm;
-#if defined(CONFIG_MMU_NOTIFIER)
-	DECLARE_HASHTABLE(mmu_notifiers, 7);
-#endif
+	DECLARE_HASHTABLE(mm_structs, 7);
+	struct mutex mm_lock;
 
 	/* Kernel Modesetting */
 
@@ -1814,8 +1814,8 @@ struct drm_i915_gem_object {
 			unsigned workers :4;
 #define I915_GEM_USERPTR_MAX_WORKERS 15
 
-			struct mm_struct *mm;
-			struct i915_mmu_object *mn;
+			struct i915_mm_struct *mm;
+			struct i915_mmu_object *mmu_object;
 			struct work_struct *work;
 		} userptr;
 	};
@@ -2178,6 +2178,7 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
+void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
 
 extern void intel_console_resume(struct work_struct *work);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ba7f5c6bb50d..ad55b06a3cb1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1590,10 +1590,13 @@ unlock:
 out:
 	switch (ret) {
 	case -EIO:
-		/* If this -EIO is due to a gpu hang, give the reset code a
-		 * chance to clean up the mess. Otherwise return the proper
-		 * SIGBUS. */
-		if (i915_terminally_wedged(&dev_priv->gpu_error)) {
+		/*
+		 * We eat errors when the gpu is terminally wedged to avoid
+		 * userspace unduly crashing (gl has no provisions for mmaps to
+		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
+		 * and so needs to be reported.
+		 */
+		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
 			ret = VM_FAULT_SIGBUS;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index fe69fc837d9e..d38413997379 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -32,6 +32,15 @@
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 
+struct i915_mm_struct {
+	struct mm_struct *mm;
+	struct drm_device *dev;
+	struct i915_mmu_notifier *mn;
+	struct hlist_node node;
+	struct kref kref;
+	struct work_struct work;
+};
+
 #if defined(CONFIG_MMU_NOTIFIER)
 #include <linux/interval_tree.h>
 
@@ -41,16 +50,12 @@ struct i915_mmu_notifier {
 	struct mmu_notifier mn;
 	struct rb_root objects;
 	struct list_head linear;
-	struct drm_device *dev;
-	struct mm_struct *mm;
-	struct work_struct work;
-	unsigned long count;
 	unsigned long serial;
 	bool has_linear;
 };
 
 struct i915_mmu_object {
-	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_notifier *mn;
 	struct interval_tree_node it;
 	struct list_head link;
 	struct drm_i915_gem_object *obj;
@@ -96,18 +101,18 @@ static void *invalidate_range__linear(struct i915_mmu_notifier *mn,
 				      unsigned long start,
 				      unsigned long end)
 {
-	struct i915_mmu_object *mmu;
+	struct i915_mmu_object *mo;
 	unsigned long serial;
 
 restart:
 	serial = mn->serial;
-	list_for_each_entry(mmu, &mn->linear, link) {
+	list_for_each_entry(mo, &mn->linear, link) {
 		struct drm_i915_gem_object *obj;
 
-		if (mmu->it.last < start || mmu->it.start > end)
+		if (mo->it.last < start || mo->it.start > end)
 			continue;
 
-		obj = mmu->obj;
+		obj = mo->obj;
 		drm_gem_object_reference(&obj->base);
 		spin_unlock(&mn->lock);
 
@@ -160,130 +165,47 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 };
 
 static struct i915_mmu_notifier *
-__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_mmu_notifier *mmu;
-
-	/* Protected by dev->struct_mutex */
-	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
-		if (mmu->mm == mm)
-			return mmu;
-
-	return NULL;
-}
-
-static struct i915_mmu_notifier *
-i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+i915_mmu_notifier_create(struct mm_struct *mm)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_notifier *mn;
 	int ret;
 
-	lockdep_assert_held(&dev->struct_mutex);
-
-	mmu = __i915_mmu_notifier_lookup(dev, mm);
-	if (mmu)
-		return mmu;
-
-	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
-	if (mmu == NULL)
+	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	spin_lock_init(&mmu->lock);
-	mmu->dev = dev;
-	mmu->mn.ops = &i915_gem_userptr_notifier;
-	mmu->mm = mm;
-	mmu->objects = RB_ROOT;
-	mmu->count = 0;
-	mmu->serial = 1;
-	INIT_LIST_HEAD(&mmu->linear);
-	mmu->has_linear = false;
-
-	/* Protected by mmap_sem (write-lock) */
-	ret = __mmu_notifier_register(&mmu->mn, mm);
+	spin_lock_init(&mn->lock);
+	mn->mn.ops = &i915_gem_userptr_notifier;
+	mn->objects = RB_ROOT;
+	mn->serial = 1;
+	INIT_LIST_HEAD(&mn->linear);
+	mn->has_linear = false;
+
+	 /* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mn->mn, mm);
 	if (ret) {
-		kfree(mmu);
+		kfree(mn);
 		return ERR_PTR(ret);
 	}
 
-	/* Protected by dev->struct_mutex */
-	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
-	return mmu;
+	return mn;
 }
 
-static void
-__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mn)
 {
-	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
-	mmu_notifier_unregister(&mmu->mn, mmu->mm);
-	kfree(mmu);
-}
-
-static void
-__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
-{
-	lockdep_assert_held(&mmu->dev->struct_mutex);
-
-	/* Protected by dev->struct_mutex */
-	hash_del(&mmu->node);
-
-	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
-	 * We enter the function holding struct_mutex, therefore we need
-	 * to drop our mutex prior to calling mmu_notifier_unregister in
-	 * order to prevent lock inversion (and system-wide deadlock)
-	 * between the mmap_sem and struct-mutex. Hence we defer the
-	 * unregistration to a workqueue where we hold no locks.
-	 */
-	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
-	schedule_work(&mmu->work);
-}
-
-static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
-{
-	if (++mmu->serial == 0)
-		mmu->serial = 1;
-}
-
-static bool i915_mmu_notifier_has_linear(struct i915_mmu_notifier *mmu)
-{
-	struct i915_mmu_object *mn;
-
-	list_for_each_entry(mn, &mmu->linear, link)
-		if (mn->is_linear)
-			return true;
-
-	return false;
-}
-
-static void
-i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
-		      struct i915_mmu_object *mn)
-{
-	lockdep_assert_held(&mmu->dev->struct_mutex);
-
-	spin_lock(&mmu->lock);
-	list_del(&mn->link);
-	if (mn->is_linear)
-		mmu->has_linear = i915_mmu_notifier_has_linear(mmu);
-	else
-		interval_tree_remove(&mn->it, &mmu->objects);
-	__i915_mmu_notifier_update_serial(mmu);
-	spin_unlock(&mmu->lock);
-
-	/* Protected against _add() by dev->struct_mutex */
-	if (--mmu->count == 0)
-		__i915_mmu_notifier_destroy(mmu);
+	if (++mn->serial == 0)
+		mn->serial = 1;
 }
 
 static int
-i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
-		      struct i915_mmu_object *mn)
+i915_mmu_notifier_add(struct drm_device *dev,
+		      struct i915_mmu_notifier *mn,
+		      struct i915_mmu_object *mo)
 {
 	struct interval_tree_node *it;
 	int ret;
 
-	ret = i915_mutex_lock_interruptible(mmu->dev);
+	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
 
@@ -291,11 +213,11 @@ i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
 	 * remove the objects from the interval tree) before we do
 	 * the check for overlapping objects.
 	 */
-	i915_gem_retire_requests(mmu->dev);
+	i915_gem_retire_requests(dev);
 
-	spin_lock(&mmu->lock);
-	it = interval_tree_iter_first(&mmu->objects,
-				      mn->it.start, mn->it.last);
+	spin_lock(&mn->lock);
+	it = interval_tree_iter_first(&mn->objects,
+				      mo->it.start, mo->it.last);
 	if (it) {
 		struct drm_i915_gem_object *obj;
 
@@ -312,86 +234,122 @@ i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
 
 		obj = container_of(it, struct i915_mmu_object, it)->obj;
 		if (!obj->userptr.workers)
-			mmu->has_linear = mn->is_linear = true;
+			mn->has_linear = mo->is_linear = true;
 		else
 			ret = -EAGAIN;
 	} else
-		interval_tree_insert(&mn->it, &mmu->objects);
+		interval_tree_insert(&mo->it, &mn->objects);
 
 	if (ret == 0) {
-		list_add(&mn->link, &mmu->linear);
-		__i915_mmu_notifier_update_serial(mmu);
+		list_add(&mo->link, &mn->linear);
+		__i915_mmu_notifier_update_serial(mn);
 	}
-	spin_unlock(&mmu->lock);
-	mutex_unlock(&mmu->dev->struct_mutex);
+	spin_unlock(&mn->lock);
+	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
 }
 
+static bool i915_mmu_notifier_has_linear(struct i915_mmu_notifier *mn)
+{
+	struct i915_mmu_object *mo;
+
+	list_for_each_entry(mo, &mn->linear, link)
+		if (mo->is_linear)
+			return true;
+
+	return false;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mn,
+		      struct i915_mmu_object *mo)
+{
+	spin_lock(&mn->lock);
+	list_del(&mo->link);
+	if (mo->is_linear)
+		mn->has_linear = i915_mmu_notifier_has_linear(mn);
+	else
+		interval_tree_remove(&mo->it, &mn->objects);
+	__i915_mmu_notifier_update_serial(mn);
+	spin_unlock(&mn->lock);
+}
+
 static void
 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 {
-	struct i915_mmu_object *mn;
+	struct i915_mmu_object *mo;
 
-	mn = obj->userptr.mn;
-	if (mn == NULL)
+	mo = obj->userptr.mmu_object;
+	if (mo == NULL)
 		return;
 
-	i915_mmu_notifier_del(mn->mmu, mn);
-	obj->userptr.mn = NULL;
+	i915_mmu_notifier_del(mo->mn, mo);
+	kfree(mo);
+
+	obj->userptr.mmu_object = NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_find(struct i915_mm_struct *mm)
+{
+	if (mm->mn == NULL) {
+		down_write(&mm->mm->mmap_sem);
+		mutex_lock(&to_i915(mm->dev)->mm_lock);
+		if (mm->mn == NULL)
+			mm->mn = i915_mmu_notifier_create(mm->mm);
+		mutex_unlock(&to_i915(mm->dev)->mm_lock);
+		up_write(&mm->mm->mmap_sem);
+	}
+	return mm->mn;
 }
 
 static int
 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 				    unsigned flags)
 {
-	struct i915_mmu_notifier *mmu;
-	struct i915_mmu_object *mn;
+	struct i915_mmu_notifier *mn;
+	struct i915_mmu_object *mo;
 	int ret;
 
 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
 
-	down_write(&obj->userptr.mm->mmap_sem);
-	ret = i915_mutex_lock_interruptible(obj->base.dev);
-	if (ret == 0) {
-		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
-		if (!IS_ERR(mmu))
-			mmu->count++; /* preemptive add to act as a refcount */
-		else
-			ret = PTR_ERR(mmu);
-		mutex_unlock(&obj->base.dev->struct_mutex);
-	}
-	up_write(&obj->userptr.mm->mmap_sem);
-	if (ret)
-		return ret;
+	if (WARN_ON(obj->userptr.mm == NULL))
+		return -EINVAL;
 
-	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
-	if (mn == NULL) {
-		ret = -ENOMEM;
-		goto destroy_mmu;
-	}
+	mn = i915_mmu_notifier_find(obj->userptr.mm);
+	if (IS_ERR(mn))
+		return PTR_ERR(mn);
 
-	mn->mmu = mmu;
-	mn->it.start = obj->userptr.ptr;
-	mn->it.last = mn->it.start + obj->base.size - 1;
-	mn->obj = obj;
+	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
+	if (mo == NULL)
+		return -ENOMEM;
 
-	ret = i915_mmu_notifier_add(mmu, mn);
-	if (ret)
-		goto free_mn;
+	mo->mn = mn;
+	mo->it.start = obj->userptr.ptr;
+	mo->it.last = mo->it.start + obj->base.size - 1;
+	mo->obj = obj;
 
-	obj->userptr.mn = mn;
+	ret = i915_mmu_notifier_add(obj->base.dev, mn, mo);
+	if (ret) {
+		kfree(mo);
+		return ret;
+	}
+
+	obj->userptr.mmu_object = mo;
 	return 0;
+}
+
+static void
+i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
+		       struct mm_struct *mm)
+{
+	if (mn == NULL)
+		return;
 
-free_mn:
+	mmu_notifier_unregister(&mn->mn, mm);
 	kfree(mn);
-destroy_mmu:
-	mutex_lock(&obj->base.dev->struct_mutex);
-	if (--mmu->count == 0)
-		__i915_mmu_notifier_destroy(mmu);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-	return ret;
 }
 
 #else
@@ -413,15 +371,114 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 
 	return 0;
 }
+
+static void
+i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
+		       struct mm_struct *mm)
+{
+}
+
 #endif
 
+static struct i915_mm_struct *
+__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
+{
+	struct i915_mm_struct *mm;
+
+	/* Protected by dev_priv->mm_lock */
+	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
+		if (mm->mm == real)
+			return mm;
+
+	return NULL;
+}
+
+static int
+i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_mm_struct *mm;
+	int ret = 0;
+
+	/* During release of the GEM object we hold the struct_mutex. This
+	 * precludes us from calling mmput() at that time as that may be
+	 * the last reference and so call exit_mmap(). exit_mmap() will
+	 * attempt to reap the vma, and if we were holding a GTT mmap
+	 * would then call drm_gem_vm_close() and attempt to reacquire
+	 * the struct mutex. So in order to avoid that recursion, we have
+	 * to defer releasing the mm reference until after we drop the
+	 * struct_mutex, i.e. we need to schedule a worker to do the clean
+	 * up.
+	 */
+	mutex_lock(&dev_priv->mm_lock);
+	mm = __i915_mm_struct_find(dev_priv, current->mm);
+	if (mm == NULL) {
+		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
+		if (mm == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		kref_init(&mm->kref);
+		mm->dev = obj->base.dev;
+
+		mm->mm = current->mm;
+		atomic_inc(&current->mm->mm_count);
+
+		mm->mn = NULL;
+
+		/* Protected by dev_priv->mm_lock */
+		hash_add(dev_priv->mm_structs,
+			 &mm->node, (unsigned long)mm->mm);
+	} else
+		kref_get(&mm->kref);
+
+	obj->userptr.mm = mm;
+out:
+	mutex_unlock(&dev_priv->mm_lock);
+	return ret;
+}
+
+static void
+__i915_mm_struct_free__worker(struct work_struct *work)
+{
+	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
+	i915_mmu_notifier_free(mm->mn, mm->mm);
+	mmdrop(mm->mm);
+	kfree(mm);
+}
+
+static void
+__i915_mm_struct_free(struct kref *kref)
+{
+	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
+
+	/* Protected by dev_priv->mm_lock */
+	hash_del(&mm->node);
+	mutex_unlock(&to_i915(mm->dev)->mm_lock);
+
+	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
+	schedule_work(&mm->work);
+}
+
+static void
+i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mm == NULL)
+		return;
+
+	kref_put_mutex(&obj->userptr.mm->kref,
+		       __i915_mm_struct_free,
+		       &to_i915(obj->base.dev)->mm_lock);
+	obj->userptr.mm = NULL;
+}
+
 struct get_pages_work {
 	struct work_struct work;
 	struct drm_i915_gem_object *obj;
 	struct task_struct *task;
 };
 
-
 #if IS_ENABLED(CONFIG_SWIOTLB)
 #define swiotlb_active() swiotlb_nr_tbl()
 #else
@@ -479,7 +536,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	if (pvec == NULL)
 		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
 	if (pvec != NULL) {
-		struct mm_struct *mm = obj->userptr.mm;
+		struct mm_struct *mm = obj->userptr.mm->mm;
 
 		down_read(&mm->mmap_sem);
 		while (pinned < num_pages) {
@@ -545,7 +602,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 
 	pvec = NULL;
 	pinned = 0;
-	if (obj->userptr.mm == current->mm) {
+	if (obj->userptr.mm->mm == current->mm) {
 		pvec = kmalloc(num_pages*sizeof(struct page *),
 			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
 		if (pvec == NULL) {
@@ -651,17 +708,13 @@ static void
 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 {
 	i915_gem_userptr_release__mmu_notifier(obj);
-
-	if (obj->userptr.mm) {
-		mmput(obj->userptr.mm);
-		obj->userptr.mm = NULL;
-	}
+	i915_gem_userptr_release__mm_struct(obj);
 }
 
 static int
 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
 {
-	if (obj->userptr.mn)
+	if (obj->userptr.mmu_object)
 		return 0;
 
 	return i915_gem_userptr_init__mmu_notifier(obj, 0);
@@ -736,7 +789,6 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 		return -ENODEV;
 	}
 
-	/* Allocate the new object */
 	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return -ENOMEM;
@@ -754,8 +806,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 	 * at binding. This means that we need to hook into the mmu_notifier
 	 * in order to detect if the mmu is destroyed.
 	 */
-	ret = -ENOMEM;
-	if ((obj->userptr.mm = get_task_mm(current)))
+	ret = i915_gem_userptr_init__mm_struct(obj);
+	if (ret == 0)
 		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
 	if (ret == 0)
 		ret = drm_gem_handle_create(file, &obj->base, &handle);
@@ -772,9 +824,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 int
 i915_gem_init_userptr(struct drm_device *dev)
 {
-#if defined(CONFIG_MMU_NOTIFIER)
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	hash_init(dev_priv->mmu_notifiers);
-#endif
+	mutex_init(&dev_priv->mm_lock);
+	hash_init(dev_priv->mm_structs);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 390ccc2a3096..0050ee9470f1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1189,8 +1189,8 @@ static void i915_hotplug_work_func(struct work_struct *work)
 	  * some connectors */
 	if (hpd_disabled) {
 		drm_kms_helper_poll_enable(dev);
-		mod_timer(&dev_priv->hotplug_reenable_timer,
-			  jiffies + msecs_to_jiffies(I915_REENABLE_HOTPLUG_DELAY));
+		mod_delayed_work(system_wq, &dev_priv->hotplug_reenable_work,
+				 msecs_to_jiffies(I915_REENABLE_HOTPLUG_DELAY));
 	}
 
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
@@ -1213,11 +1213,6 @@ static void i915_hotplug_work_func(struct work_struct *work)
 		drm_kms_helper_hotplug_event(dev);
 }
 
-static void intel_hpd_irq_uninstall(struct drm_i915_private *dev_priv)
-{
-	del_timer_sync(&dev_priv->hotplug_reenable_timer);
-}
-
 static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3892,8 +3887,6 @@ static void gen8_irq_uninstall(struct drm_device *dev)
 	if (!dev_priv)
 		return;
 
-	intel_hpd_irq_uninstall(dev_priv);
-
 	gen8_irq_reset(dev);
 }
 
@@ -3908,8 +3901,6 @@ static void valleyview_irq_uninstall(struct drm_device *dev)
 
 	I915_WRITE(VLV_MASTER_IER, 0);
 
-	intel_hpd_irq_uninstall(dev_priv);
-
 	for_each_pipe(pipe)
 		I915_WRITE(PIPESTAT(pipe), 0xffff);
 
@@ -3988,8 +3979,6 @@ static void ironlake_irq_uninstall(struct drm_device *dev)
 	if (!dev_priv)
 		return;
 
-	intel_hpd_irq_uninstall(dev_priv);
-
 	ironlake_irq_reset(dev);
 }
 
@@ -4360,8 +4349,6 @@ static void i915_irq_uninstall(struct drm_device * dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
 
-	intel_hpd_irq_uninstall(dev_priv);
-
 	if (I915_HAS_HOTPLUG(dev)) {
 		I915_WRITE(PORT_HOTPLUG_EN, 0);
 		I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
@@ -4598,8 +4585,6 @@ static void i965_irq_uninstall(struct drm_device * dev)
 	if (!dev_priv)
 		return;
 
-	intel_hpd_irq_uninstall(dev_priv);
-
 	I915_WRITE(PORT_HOTPLUG_EN, 0);
 	I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
 
@@ -4615,14 +4600,18 @@ static void i965_irq_uninstall(struct drm_device * dev)
 	I915_WRITE(IIR, I915_READ(IIR));
 }
 
-static void intel_hpd_irq_reenable(unsigned long data)
+static void intel_hpd_irq_reenable(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv = (struct drm_i915_private *)data;
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv),
+			     hotplug_reenable_work.work);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	unsigned long irqflags;
 	int i;
 
+	intel_runtime_pm_get(dev_priv);
+
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	for (i = (HPD_NONE + 1); i < HPD_NUM_PINS; i++) {
 		struct drm_connector *connector;
@@ -4648,6 +4637,8 @@ static void intel_hpd_irq_reenable(unsigned long data)
 	if (dev_priv->display.hpd_irq_setup)
 		dev_priv->display.hpd_irq_setup(dev);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
+	intel_runtime_pm_put(dev_priv);
 }
 
 void intel_irq_init(struct drm_device *dev)
@@ -4670,8 +4661,8 @@ void intel_irq_init(struct drm_device *dev)
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
 		    (unsigned long) dev);
-	setup_timer(&dev_priv->hotplug_reenable_timer, intel_hpd_irq_reenable,
-		    (unsigned long) dev_priv);
+	INIT_DELAYED_WORK(&dev_priv->hotplug_reenable_work,
+			  intel_hpd_irq_reenable);
 
 	pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e4d7607da2c4..f29b44c86a2f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -334,16 +334,20 @@
 #define GFX_OP_DESTBUFFER_INFO	 ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
 #define GFX_OP_DRAWRECT_INFO     ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
 #define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
-#define SRC_COPY_BLT_CMD                ((2<<29)|(0x43<<22)|4)
+
+#define COLOR_BLT_CMD			(2<<29 | 0x40<<22 | (5-2))
+#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|4)
 #define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
 #define XY_MONO_SRC_COPY_IMM_BLT	((2<<29)|(0x71<<22)|5)
-#define XY_SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
-#define XY_SRC_COPY_BLT_WRITE_RGB	(1<<20)
+#define   BLT_WRITE_A			(2<<20)
+#define   BLT_WRITE_RGB			(1<<20)
+#define   BLT_WRITE_RGBA		(BLT_WRITE_RGB | BLT_WRITE_A)
 #define   BLT_DEPTH_8			(0<<24)
 #define   BLT_DEPTH_16_565		(1<<24)
 #define   BLT_DEPTH_16_1555		(2<<24)
 #define   BLT_DEPTH_32			(3<<24)
-#define   BLT_ROP_GXCOPY		(0xcc<<16)
+#define   BLT_ROP_SRC_COPY		(0xcc<<16)
+#define   BLT_ROP_COLOR_COPY		(0xf0<<16)
 #define XY_SRC_COPY_BLT_SRC_TILED	(1<<15) /* 965+ only */
 #define XY_SRC_COPY_BLT_DST_TILED	(1<<11) /* 965+ only */
 #define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index a66955037e4e..eee79e1c3222 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -1123,7 +1123,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int __init intel_no_opregion_vbt_callback(const struct dmi_system_id *id)
+static int intel_no_opregion_vbt_callback(const struct dmi_system_id *id)
 {
 	DRM_DEBUG_KMS("Falling back to manually reading VBT from "
 		      "VBIOS ROM for %s\n",
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 2efaf8e8d9c4..9212e6504e0f 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -699,16 +699,21 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 		goto out;
 	}
 
+	drm_modeset_acquire_init(&ctx, 0);
+
 	/* for pre-945g platforms use load detect */
 	if (intel_get_load_detect_pipe(connector, NULL, &tmp, &ctx)) {
 		if (intel_crt_detect_ddc(connector))
 			status = connector_status_connected;
 		else
 			status = intel_crt_load_detect(crt);
-		intel_release_load_detect_pipe(connector, &tmp, &ctx);
+		intel_release_load_detect_pipe(connector, &tmp);
 	} else
 		status = connector_status_unknown;
 
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
 out:
 	intel_display_power_put(dev_priv, power_domain);
 	return status;
@@ -799,7 +804,7 @@ static const struct drm_encoder_funcs intel_crt_enc_funcs = {
 	.destroy = intel_encoder_destroy,
 };
 
-static int __init intel_no_crt_dmi_callback(const struct dmi_system_id *id)
+static int intel_no_crt_dmi_callback(const struct dmi_system_id *id)
 {
 	DRM_INFO("Skipping CRT initialization for %s\n", id->ident);
 	return 1;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 018fb7222f60..d8324c69fa86 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2233,6 +2233,15 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	if (need_vtd_wa(dev) && alignment < 256 * 1024)
 		alignment = 256 * 1024;
 
+	/*
+	 * Global gtt pte registers are special registers which actually forward
+	 * writes to a chunk of system memory. Which means that there is no risk
+	 * that the register values disappear as soon as we call
+	 * intel_runtime_pm_put(), so it is correct to wrap only the
+	 * pin/unpin/fence and not more.
+	 */
+	intel_runtime_pm_get(dev_priv);
+
 	dev_priv->mm.interruptible = false;
 	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
 	if (ret)
@@ -2250,12 +2259,14 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	i915_gem_object_pin_fence(obj);
 
 	dev_priv->mm.interruptible = true;
+	intel_runtime_pm_put(dev_priv);
 	return 0;
 
 err_unpin:
 	i915_gem_object_unpin_from_display_plane(obj);
 err_interruptible:
 	dev_priv->mm.interruptible = true;
+	intel_runtime_pm_put(dev_priv);
 	return ret;
 }
 
@@ -4188,10 +4199,6 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 		intel_set_pch_fifo_underrun_reporting(dev, pipe, false);
 
 	intel_disable_pipe(dev_priv, pipe);
-
-	if (intel_crtc->config.dp_encoder_is_mst)
-		intel_ddi_set_vc_payload_alloc(crtc, false);
-
 	ironlake_pfit_disable(intel_crtc);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
@@ -4256,6 +4263,9 @@ static void haswell_crtc_disable(struct drm_crtc *crtc)
 		intel_set_pch_fifo_underrun_reporting(dev, TRANSCODER_A, false);
 	intel_disable_pipe(dev_priv, pipe);
 
+	if (intel_crtc->config.dp_encoder_is_mst)
+		intel_ddi_set_vc_payload_alloc(crtc, false);
+
 	intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
 
 	ironlake_pfit_disable(intel_crtc);
@@ -8240,6 +8250,15 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 			goto fail_locked;
 		}
 
+		/*
+		 * Global gtt pte registers are special registers which actually
+		 * forward writes to a chunk of system memory. Which means that
+		 * there is no risk that the register values disappear as soon
+		 * as we call intel_runtime_pm_put(), so it is correct to wrap
+		 * only the pin/unpin/fence and not more.
+		 */
+		intel_runtime_pm_get(dev_priv);
+
 		/* Note that the w/a also requires 2 PTE of padding following
 		 * the bo. We currently fill all unused PTE with the shadow
 		 * page and so we should always have valid PTE following the
@@ -8252,16 +8271,20 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 		ret = i915_gem_object_pin_to_display_plane(obj, alignment, NULL);
 		if (ret) {
 			DRM_DEBUG_KMS("failed to move cursor bo into the GTT\n");
+			intel_runtime_pm_put(dev_priv);
 			goto fail_locked;
 		}
 
 		ret = i915_gem_object_put_fence(obj);
 		if (ret) {
 			DRM_DEBUG_KMS("failed to release fence for cursor");
+			intel_runtime_pm_put(dev_priv);
 			goto fail_unpin;
 		}
 
 		addr = i915_gem_obj_ggtt_offset(obj);
+
+		intel_runtime_pm_put(dev_priv);
 	} else {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
@@ -8462,8 +8485,6 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 		      connector->base.id, connector->name,
 		      encoder->base.id, encoder->name);
 
-	drm_modeset_acquire_init(ctx, 0);
-
 retry:
 	ret = drm_modeset_lock(&config->connection_mutex, ctx);
 	if (ret)
@@ -8502,10 +8523,14 @@ retry:
 		i++;
 		if (!(encoder->possible_crtcs & (1 << i)))
 			continue;
-		if (!possible_crtc->enabled) {
-			crtc = possible_crtc;
-			break;
-		}
+		if (possible_crtc->enabled)
+			continue;
+		/* This can occur when applying the pipe A quirk on resume. */
+		if (to_intel_crtc(possible_crtc)->new_enabled)
+			continue;
+
+		crtc = possible_crtc;
+		break;
 	}
 
 	/*
@@ -8574,15 +8599,11 @@ fail_unlock:
 		goto retry;
 	}
 
-	drm_modeset_drop_locks(ctx);
-	drm_modeset_acquire_fini(ctx);
-
 	return false;
 }
 
 void intel_release_load_detect_pipe(struct drm_connector *connector,
-				    struct intel_load_detect_pipe *old,
-				    struct drm_modeset_acquire_ctx *ctx)
+				    struct intel_load_detect_pipe *old)
 {
 	struct intel_encoder *intel_encoder =
 		intel_attached_encoder(connector);
@@ -8606,17 +8627,12 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 			drm_framebuffer_unreference(old->release_fb);
 		}
 
-		goto unlock;
 		return;
 	}
 
 	/* Switch crtc and encoder back off if necessary */
 	if (old->dpms_mode != DRM_MODE_DPMS_ON)
 		connector->funcs->dpms(connector, old->dpms_mode);
-
-unlock:
-	drm_modeset_drop_locks(ctx);
-	drm_modeset_acquire_fini(ctx);
 }
 
 static int i9xx_pll_refclk(struct drm_device *dev,
@@ -11700,8 +11716,8 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 	};
 	const struct drm_rect clip = {
 		/* integer pixels */
-		.x2 = intel_crtc->config.pipe_src_w,
-		.y2 = intel_crtc->config.pipe_src_h,
+		.x2 = intel_crtc->active ? intel_crtc->config.pipe_src_w : 0,
+		.y2 = intel_crtc->active ? intel_crtc->config.pipe_src_h : 0,
 	};
 	bool visible;
 	int ret;
@@ -12488,6 +12504,9 @@ static struct intel_quirk intel_quirks[] = {
 	/* Acer C720 and C720P Chromebooks (Celeron 2955U) have backlights */
 	{ 0x0a06, 0x1025, 0x0a11, quirk_backlight_present },
 
+	/* Acer C720 Chromebook (Core i3 4005U) */
+	{ 0x0a16, 0x1025, 0x0a11, quirk_backlight_present },
+
 	/* Toshiba CB35 Chromebook (Celeron 2955U) */
 	{ 0x0a06, 0x1179, 0x0a88, quirk_backlight_present },
 
@@ -12659,7 +12678,7 @@ static void intel_enable_pipe_a(struct drm_device *dev)
 	struct intel_connector *connector;
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
-	struct drm_modeset_acquire_ctx ctx;
+	struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
 	 * proper mode and output configuration. As a gross hack, enable pipe A
@@ -12676,10 +12695,8 @@ static void intel_enable_pipe_a(struct drm_device *dev)
 	if (!crt)
 		return;
 
-	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, &ctx))
-		intel_release_load_detect_pipe(crt, &load_detect_temp, &ctx);
-
-
+	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, ctx))
+		intel_release_load_detect_pipe(crt, &load_detect_temp);
 }
 
 static bool
@@ -13112,7 +13129,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	 * experience fancy races otherwise.
 	 */
 	drm_irq_uninstall(dev);
-	cancel_work_sync(&dev_priv->hotplug_work);
+	intel_hpd_cancel_work(dev_priv);
 	dev_priv->pm._irqs_disabled = true;
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ee3942f0b068..fdff1d420c14 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1631,6 +1631,10 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 
 	pipe_config->adjusted_mode.flags |= flags;
 
+	if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev) &&
+	    tmp & DP_COLOR_RANGE_16_235)
+		pipe_config->limited_color_range = true;
+
 	pipe_config->has_dp_encoder = true;
 
 	intel_dp_get_m_n(crtc, pipe_config);
@@ -3553,6 +3557,9 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 	if (WARN_ON(!intel_encoder->base.crtc))
 		return;
 
+	if (!to_intel_crtc(intel_encoder->base.crtc)->active)
+		return;
+
 	/* Try to read receiver status if the link appears to be up */
 	if (!intel_dp_get_link_status(intel_dp, link_status)) {
 		return;
@@ -3658,24 +3665,12 @@ ironlake_dp_detect(struct intel_dp *intel_dp)
 	return intel_dp_detect_dpcd(intel_dp);
 }
 
-static enum drm_connector_status
-g4x_dp_detect(struct intel_dp *intel_dp)
+static int g4x_digital_port_connected(struct drm_device *dev,
+				       struct intel_digital_port *intel_dig_port)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	uint32_t bit;
 
-	/* Can't disconnect eDP, but you can close the lid... */
-	if (is_edp(intel_dp)) {
-		enum drm_connector_status status;
-
-		status = intel_panel_detect(dev);
-		if (status == connector_status_unknown)
-			status = connector_status_connected;
-		return status;
-	}
-
 	if (IS_VALLEYVIEW(dev)) {
 		switch (intel_dig_port->port) {
 		case PORT_B:
@@ -3688,7 +3683,7 @@ g4x_dp_detect(struct intel_dp *intel_dp)
 			bit = PORTD_HOTPLUG_LIVE_STATUS_VLV;
 			break;
 		default:
-			return connector_status_unknown;
+			return -EINVAL;
 		}
 	} else {
 		switch (intel_dig_port->port) {
@@ -3702,11 +3697,36 @@ g4x_dp_detect(struct intel_dp *intel_dp)
 			bit = PORTD_HOTPLUG_LIVE_STATUS_G4X;
 			break;
 		default:
-			return connector_status_unknown;
+			return -EINVAL;
 		}
 	}
 
 	if ((I915_READ(PORT_HOTPLUG_STAT) & bit) == 0)
+		return 0;
+	return 1;
+}
+
+static enum drm_connector_status
+g4x_dp_detect(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	int ret;
+
+	/* Can't disconnect eDP, but you can close the lid... */
+	if (is_edp(intel_dp)) {
+		enum drm_connector_status status;
+
+		status = intel_panel_detect(dev);
+		if (status == connector_status_unknown)
+			status = connector_status_connected;
+		return status;
+	}
+
+	ret = g4x_digital_port_connected(dev, intel_dig_port);
+	if (ret == -EINVAL)
+		return connector_status_unknown;
+	else if (ret == 0)
 		return connector_status_disconnected;
 
 	return intel_dp_detect_dpcd(intel_dp);
@@ -4003,6 +4023,16 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	kfree(intel_dig_port);
 }
 
+static void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
+
+	if (!is_edp(intel_dp))
+		return;
+
+	edp_panel_vdd_off_sync(intel_dp);
+}
+
 static void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
 	intel_edp_panel_vdd_sanitize(to_intel_encoder(encoder));
@@ -4037,18 +4067,30 @@ bool
 intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 {
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
+	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
+	enum intel_display_power_domain power_domain;
+	bool ret = true;
+
 	if (intel_dig_port->base.type != INTEL_OUTPUT_EDP)
 		intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT;
 
 	DRM_DEBUG_KMS("got hpd irq on port %d - %s\n", intel_dig_port->port,
 		      long_hpd ? "long" : "short");
 
+	power_domain = intel_display_port_power_domain(intel_encoder);
+	intel_display_power_get(dev_priv, power_domain);
+
 	if (long_hpd) {
-		if (!ibx_digital_port_connected(dev_priv, intel_dig_port))
-			goto mst_fail;
+
+		if (HAS_PCH_SPLIT(dev)) {
+			if (!ibx_digital_port_connected(dev_priv, intel_dig_port))
+				goto mst_fail;
+		} else {
+			if (g4x_digital_port_connected(dev, intel_dig_port) != 1)
+				goto mst_fail;
+		}
 
 		if (!intel_dp_get_dpcd(intel_dp)) {
 			goto mst_fail;
@@ -4061,8 +4103,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 
 	} else {
 		if (intel_dp->is_mst) {
-			ret = intel_dp_check_mst_status(intel_dp);
-			if (ret == -EINVAL)
+			if (intel_dp_check_mst_status(intel_dp) == -EINVAL)
 				goto mst_fail;
 		}
 
@@ -4076,7 +4117,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 			drm_modeset_unlock(&dev->mode_config.connection_mutex);
 		}
 	}
-	return false;
+	ret = false;
+	goto put_power;
 mst_fail:
 	/* if we were in MST mode, and device is not there get out of MST mode */
 	if (intel_dp->is_mst) {
@@ -4084,7 +4126,10 @@ mst_fail:
 		intel_dp->is_mst = false;
 		drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst);
 	}
-	return true;
+put_power:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 /* Return which DP Port should be selected for Transcoder DP control */
@@ -4722,6 +4767,7 @@ intel_dp_init(struct drm_device *dev, int output_reg, enum port port)
 	intel_encoder->disable = intel_disable_dp;
 	intel_encoder->get_hw_state = intel_dp_get_hw_state;
 	intel_encoder->get_config = intel_dp_get_config;
+	intel_encoder->suspend = intel_dp_encoder_suspend;
 	if (IS_CHERRYVIEW(dev)) {
 		intel_encoder->pre_pll_enable = chv_dp_pre_pll_enable;
 		intel_encoder->pre_enable = chv_pre_enable_dp;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4b2664bd5b81..b8c8bbd8e5f9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -153,6 +153,12 @@ struct intel_encoder {
 	 * be set correctly before calling this function. */
 	void (*get_config)(struct intel_encoder *,
 			   struct intel_crtc_config *pipe_config);
+	/*
+	 * Called during system suspend after all pending requests for the
+	 * encoder are flushed (for example for DP AUX transactions) and
+	 * device interrupts are disabled.
+	 */
+	void (*suspend)(struct intel_encoder *);
 	int crtc_mask;
 	enum hpd_pin hpd_pin;
 };
@@ -830,8 +836,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 				struct intel_load_detect_pipe *old,
 				struct drm_modeset_acquire_ctx *ctx);
 void intel_release_load_detect_pipe(struct drm_connector *connector,
-				    struct intel_load_detect_pipe *old,
-				    struct drm_modeset_acquire_ctx *ctx);
+				    struct intel_load_detect_pipe *old);
 int intel_pin_and_fence_fb_obj(struct drm_device *dev,
 			       struct drm_i915_gem_object *obj,
 			       struct intel_engine_cs *pipelined);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index f9151f6641d9..ca34de7f6a7b 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -712,7 +712,8 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 				  struct intel_crtc_config *pipe_config)
 {
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
-	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
+	struct drm_device *dev = encoder->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 tmp, flags = 0;
 	int dotclock;
 
@@ -734,6 +735,10 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 	if (tmp & HDMI_MODE_SELECT_HDMI)
 		pipe_config->has_audio = true;
 
+	if (!HAS_PCH_SPLIT(dev) &&
+	    tmp & HDMI_COLOR_RANGE_16_235)
+		pipe_config->limited_color_range = true;
+
 	pipe_config->adjusted_mode.flags |= flags;
 
 	if ((tmp & SDVO_COLOR_FORMAT_MASK) == HDMI_COLOR_FORMAT_12bpc)
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 881361c0f27e..fdf40267249c 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -538,7 +538,7 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = {
 	.destroy = intel_encoder_destroy,
 };
 
-static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
+static int intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 {
 	DRM_INFO("Skipping LVDS initialization for %s\n", id->ident);
 	return 1;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 59b028f0b1e8..8e374449c6b5 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -801,7 +801,7 @@ static void pch_enable_backlight(struct intel_connector *connector)
 
 	cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2);
 	if (cpu_ctl2 & BLM_PWM_ENABLE) {
-		WARN(1, "cpu backlight already enabled\n");
+		DRM_DEBUG_KMS("cpu backlight already enabled\n");
 		cpu_ctl2 &= ~BLM_PWM_ENABLE;
 		I915_WRITE(BLC_PWM_CPU_CTL2, cpu_ctl2);
 	}
@@ -845,7 +845,7 @@ static void i9xx_enable_backlight(struct intel_connector *connector)
 
 	ctl = I915_READ(BLC_PWM_CTL);
 	if (ctl & BACKLIGHT_DUTY_CYCLE_MASK_PNV) {
-		WARN(1, "backlight already enabled\n");
+		DRM_DEBUG_KMS("backlight already enabled\n");
 		I915_WRITE(BLC_PWM_CTL, 0);
 	}
 
@@ -876,7 +876,7 @@ static void i965_enable_backlight(struct intel_connector *connector)
 
 	ctl2 = I915_READ(BLC_PWM_CTL2);
 	if (ctl2 & BLM_PWM_ENABLE) {
-		WARN(1, "backlight already enabled\n");
+		DRM_DEBUG_KMS("backlight already enabled\n");
 		ctl2 &= ~BLM_PWM_ENABLE;
 		I915_WRITE(BLC_PWM_CTL2, ctl2);
 	}
@@ -910,7 +910,7 @@ static void vlv_enable_backlight(struct intel_connector *connector)
 
 	ctl2 = I915_READ(VLV_BLC_PWM_CTL2(pipe));
 	if (ctl2 & BLM_PWM_ENABLE) {
-		WARN(1, "backlight already enabled\n");
+		DRM_DEBUG_KMS("backlight already enabled\n");
 		ctl2 &= ~BLM_PWM_ENABLE;
 		I915_WRITE(VLV_BLC_PWM_CTL2(pipe), ctl2);
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 16371a444426..47a126a0493f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1363,54 +1363,66 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
 
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
 #define I830_BATCH_LIMIT (256*1024)
+#define I830_TLB_ENTRIES (2)
+#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
 static int
 i830_dispatch_execbuffer(struct intel_engine_cs *ring,
 				u64 offset, u32 len,
 				unsigned flags)
 {
+	u32 cs_offset = ring->scratch.gtt_offset;
 	int ret;
 
-	if (flags & I915_DISPATCH_PINNED) {
-		ret = intel_ring_begin(ring, 4);
-		if (ret)
-			return ret;
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
 
-		intel_ring_emit(ring, MI_BATCH_BUFFER);
-		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
-		intel_ring_emit(ring, offset + len - 8);
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
-	} else {
-		u32 cs_offset = ring->scratch.gtt_offset;
+	/* Evict the invalid PTE TLBs */
+	intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
+	intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
+	intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
+	intel_ring_emit(ring, cs_offset);
+	intel_ring_emit(ring, 0xdeadbeef);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
+	if ((flags & I915_DISPATCH_PINNED) == 0) {
 		if (len > I830_BATCH_LIMIT)
 			return -ENOSPC;
 
-		ret = intel_ring_begin(ring, 9+3);
+		ret = intel_ring_begin(ring, 6 + 2);
 		if (ret)
 			return ret;
-		/* Blit the batch (which has now all relocs applied) to the stable batch
-		 * scratch bo area (so that the CS never stumbles over its tlb
-		 * invalidation bug) ... */
-		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
-				XY_SRC_COPY_BLT_WRITE_ALPHA |
-				XY_SRC_COPY_BLT_WRITE_RGB);
-		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
-		intel_ring_emit(ring, 0);
-		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
+
+		/* Blit the batch (which has now all relocs applied) to the
+		 * stable batch scratch bo area (so that the CS never
+		 * stumbles over its tlb invalidation bug) ...
+		 */
+		intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
+		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
+		intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
 		intel_ring_emit(ring, cs_offset);
-		intel_ring_emit(ring, 0);
 		intel_ring_emit(ring, 4096);
 		intel_ring_emit(ring, offset);
+
 		intel_ring_emit(ring, MI_FLUSH);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
 
 		/* ... and execute it. */
-		intel_ring_emit(ring, MI_BATCH_BUFFER);
-		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
-		intel_ring_emit(ring, cs_offset + len - 8);
-		intel_ring_advance(ring);
+		offset = cs_offset;
 	}
 
+	ret = intel_ring_begin(ring, 4);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, MI_BATCH_BUFFER);
+	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+	intel_ring_emit(ring, offset + len - 8);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
 	return 0;
 }
 
@@ -2200,7 +2212,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 
 	/* Workaround batchbuffer to combat CS tlb bug. */
 	if (HAS_BROKEN_CS_TLB(dev)) {
-		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
+		obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
 		if (obj == NULL) {
 			DRM_ERROR("Failed to allocate batch bo\n");
 			return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index e211eef4b7e4..c14341ca3ef9 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -854,6 +854,10 @@ intel_enable_tv(struct intel_encoder *encoder)
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	/* Prevents vblank waits from timing out in intel_tv_detect_type() */
+	intel_wait_for_vblank(encoder->base.dev,
+			      to_intel_crtc(encoder->base.crtc)->pipe);
+
 	I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE);
 }
 
@@ -1311,6 +1315,7 @@ intel_tv_detect(struct drm_connector *connector, bool force)
 {
 	struct drm_display_mode mode;
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
+	enum drm_connector_status status;
 	int type;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
@@ -1323,16 +1328,24 @@ intel_tv_detect(struct drm_connector *connector, bool force)
 		struct intel_load_detect_pipe tmp;
 		struct drm_modeset_acquire_ctx ctx;
 
+		drm_modeset_acquire_init(&ctx, 0);
+
 		if (intel_get_load_detect_pipe(connector, &mode, &tmp, &ctx)) {
 			type = intel_tv_detect_type(intel_tv, connector);
-			intel_release_load_detect_pipe(connector, &tmp, &ctx);
+			intel_release_load_detect_pipe(connector, &tmp);
+			status = type < 0 ?
+				connector_status_disconnected :
+				connector_status_connected;
 		} else
-			return connector_status_unknown;
+			status = connector_status_unknown;
+
+		drm_modeset_drop_locks(&ctx);
+		drm_modeset_acquire_fini(&ctx);
 	} else
 		return connector->status;
 
-	if (type < 0)
-		return connector_status_disconnected;
+	if (status != connector_status_connected)
+		return status;
 
 	intel_tv->type = type;
 	intel_tv_find_better_format(connector);
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index a125a7e32742..c6c9b02e0ada 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -258,28 +258,30 @@ static void set_hdmi_pdev(struct drm_device *dev,
 	priv->hdmi_pdev = pdev;
 }
 
+#ifdef CONFIG_OF
+static int get_gpio(struct device *dev, struct device_node *of_node, const char *name)
+{
+	int gpio = of_get_named_gpio(of_node, name, 0);
+	if (gpio < 0) {
+		char name2[32];
+		snprintf(name2, sizeof(name2), "%s-gpio", name);
+		gpio = of_get_named_gpio(of_node, name2, 0);
+		if (gpio < 0) {
+			dev_err(dev, "failed to get gpio: %s (%d)\n",
+					name, gpio);
+			gpio = -1;
+		}
+	}
+	return gpio;
+}
+#endif
+
 static int hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct hdmi_platform_config config = {};
 #ifdef CONFIG_OF
 	struct device_node *of_node = dev->of_node;
 
-	int get_gpio(const char *name)
-	{
-		int gpio = of_get_named_gpio(of_node, name, 0);
-		if (gpio < 0) {
-			char name2[32];
-			snprintf(name2, sizeof(name2), "%s-gpio", name);
-			gpio = of_get_named_gpio(of_node, name2, 0);
-			if (gpio < 0) {
-				dev_err(dev, "failed to get gpio: %s (%d)\n",
-						name, gpio);
-				gpio = -1;
-			}
-		}
-		return gpio;
-	}
-
 	if (of_device_is_compatible(of_node, "qcom,hdmi-tx-8074")) {
 		static const char *hpd_reg_names[] = {"hpd-gdsc", "hpd-5v"};
 		static const char *pwr_reg_names[] = {"core-vdda", "core-vcc"};
@@ -312,12 +314,12 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 	}
 
 	config.mmio_name     = "core_physical";
-	config.ddc_clk_gpio  = get_gpio("qcom,hdmi-tx-ddc-clk");
-	config.ddc_data_gpio = get_gpio("qcom,hdmi-tx-ddc-data");
-	config.hpd_gpio      = get_gpio("qcom,hdmi-tx-hpd");
-	config.mux_en_gpio   = get_gpio("qcom,hdmi-tx-mux-en");
-	config.mux_sel_gpio  = get_gpio("qcom,hdmi-tx-mux-sel");
-	config.mux_lpm_gpio  = get_gpio("qcom,hdmi-tx-mux-lpm");
+	config.ddc_clk_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-ddc-clk");
+	config.ddc_data_gpio = get_gpio(dev, of_node, "qcom,hdmi-tx-ddc-data");
+	config.hpd_gpio      = get_gpio(dev, of_node, "qcom,hdmi-tx-hpd");
+	config.mux_en_gpio   = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-en");
+	config.mux_sel_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-sel");
+	config.mux_lpm_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-lpm");
 
 #else
 	static const char *hpd_clk_names[] = {
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
index 902d7685d441..f408b69486a8 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
@@ -15,19 +15,25 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifdef CONFIG_COMMON_CLK
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#endif
 
 #include "hdmi.h"
 
 struct hdmi_phy_8960 {
 	struct hdmi_phy base;
 	struct hdmi *hdmi;
+#ifdef CONFIG_COMMON_CLK
 	struct clk_hw pll_hw;
 	struct clk *pll;
 	unsigned long pixclk;
+#endif
 };
 #define to_hdmi_phy_8960(x) container_of(x, struct hdmi_phy_8960, base)
+
+#ifdef CONFIG_COMMON_CLK
 #define clk_to_phy(x) container_of(x, struct hdmi_phy_8960, pll_hw)
 
 /*
@@ -374,7 +380,7 @@ static struct clk_init_data pll_init = {
 	.parent_names = hdmi_pll_parents,
 	.num_parents = ARRAY_SIZE(hdmi_pll_parents),
 };
-
+#endif
 
 /*
  * HDMI Phy:
@@ -480,12 +486,15 @@ struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi)
 {
 	struct hdmi_phy_8960 *phy_8960;
 	struct hdmi_phy *phy = NULL;
-	int ret, i;
+	int ret;
+#ifdef CONFIG_COMMON_CLK
+	int i;
 
 	/* sanity check: */
 	for (i = 0; i < (ARRAY_SIZE(freqtbl) - 1); i++)
 		if (WARN_ON(freqtbl[i].rate < freqtbl[i+1].rate))
 			return ERR_PTR(-EINVAL);
+#endif
 
 	phy_8960 = kzalloc(sizeof(*phy_8960), GFP_KERNEL);
 	if (!phy_8960) {
@@ -499,6 +508,7 @@ struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi)
 
 	phy_8960->hdmi = hdmi;
 
+#ifdef CONFIG_COMMON_CLK
 	phy_8960->pll_hw.init = &pll_init;
 	phy_8960->pll = devm_clk_register(hdmi->dev->dev, &phy_8960->pll_hw);
 	if (IS_ERR(phy_8960->pll)) {
@@ -506,6 +516,7 @@ struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi)
 		phy_8960->pll = NULL;
 		goto fail;
 	}
+#endif
 
 	return phy;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 74cebb51e8c2..c6c80ea28c35 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -397,6 +397,7 @@ static void mdp4_crtc_prepare(struct drm_crtc *crtc)
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
 	DBG("%s", mdp4_crtc->name);
 	/* make sure we hold a ref to mdp clks while setting up mode: */
+	drm_crtc_vblank_get(crtc);
 	mdp4_enable(get_kms(crtc));
 	mdp4_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 }
@@ -407,6 +408,7 @@ static void mdp4_crtc_commit(struct drm_crtc *crtc)
 	crtc_flush(crtc);
 	/* drop the ref to mdp clk's that we got in prepare: */
 	mdp4_disable(get_kms(crtc));
+	drm_crtc_vblank_put(crtc);
 }
 
 static int mdp4_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index b447c01ad89c..fcf95680413d 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -52,7 +52,7 @@ module_param(reglog, bool, 0600);
 #define reglog 0
 #endif
 
-static char *vram;
+static char *vram = "16m";
 MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU");
 module_param(vram, charp, 0);
 
@@ -974,12 +974,11 @@ static int msm_pdev_probe(struct platform_device *pdev)
 
 	for (i = 0; i < ARRAY_SIZE(devnames); i++) {
 		struct device *dev;
-		int ret;
 
 		dev = bus_find_device_by_name(&platform_bus_type,
 				NULL, devnames[i]);
 		if (!dev) {
-			dev_info(master, "still waiting for %s\n", devnames[i]);
+			dev_info(&pdev->dev, "still waiting for %s\n", devnames[i]);
 			return -EPROBE_DEFER;
 		}
 
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 9c5221ce391a..ab5bfd2d0ebf 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -143,7 +143,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 	ret = msm_gem_get_iova_locked(fbdev->bo, 0, &paddr);
 	if (ret) {
 		dev_err(dev->dev, "failed to get buffer obj iova: %d\n", ret);
-		goto fail;
+		goto fail_unlock;
 	}
 
 	fbi = framebuffer_alloc(0, dev->dev);
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 099af483fdf0..7acdaa5688b7 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -27,8 +27,8 @@ struct msm_iommu {
 static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
 		unsigned long iova, int flags, void *arg)
 {
-	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
-	return -ENOSYS;
+	pr_warn_ratelimited("*** fault: iova=%08lx, flags=%d\n", iova, flags);
+	return 0;
 }
 
 static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
diff --git a/drivers/gpu/drm/nouveau/core/core/client.c b/drivers/gpu/drm/nouveau/core/core/client.c
index 10598dede9e9..68bf06768123 100644
--- a/drivers/gpu/drm/nouveau/core/core/client.c
+++ b/drivers/gpu/drm/nouveau/core/core/client.c
@@ -132,12 +132,12 @@ nvkm_client_notify_new(struct nouveau_client *client,
 		if (ret == 0) {
 			client->notify[index] = notify;
 			notify->client = client;
-			return 0;
+			return index;
 		}
 	}
 
 	kfree(notify);
-	return 0;
+	return ret;
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/core/core/parent.c b/drivers/gpu/drm/nouveau/core/core/parent.c
index 8701968a9743..30a2911878f8 100644
--- a/drivers/gpu/drm/nouveau/core/core/parent.c
+++ b/drivers/gpu/drm/nouveau/core/core/parent.c
@@ -86,7 +86,7 @@ nouveau_parent_lclass(struct nouveau_object *parent, u32 *lclass, int size)
 	sclass = nv_parent(parent)->sclass;
 	while (sclass) {
 		if (++nr < size)
-			lclass[nr] = sclass->oclass->handle;
+			lclass[nr] = sclass->oclass->handle & 0xffff;
 		sclass = sclass->sclass;
 	}
 
@@ -96,7 +96,7 @@ nouveau_parent_lclass(struct nouveau_object *parent, u32 *lclass, int size)
 		if (engine && (oclass = engine->sclass)) {
 			while (oclass->ofuncs) {
 				if (++nr < size)
-					lclass[nr] = oclass->handle;
+					lclass[nr] = oclass->handle & 0xffff;
 				oclass++;
 			}
 		}
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
index 54ec53bc6252..cdf9147f32a1 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
@@ -163,6 +163,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_BUS    ] =  nvc0_bus_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &gk20a_timer_oclass;
 		device->oclass[NVDEV_SUBDEV_FB     ] =  gk20a_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_LTC    ] =  gk104_ltc_oclass;
 		device->oclass[NVDEV_SUBDEV_IBUS   ] = &gk20a_ibus_oclass;
 		device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
index db19191176fa..30fd1dc64f93 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
@@ -68,6 +68,9 @@ nvc0_graph_zbc_color_get(struct nvc0_graph_priv *priv, int format,
 		}
 	}
 
+	if (zbc < 0)
+		return zbc;
+
 	memcpy(priv->zbc_color[zbc].ds, ds, sizeof(priv->zbc_color[zbc].ds));
 	memcpy(priv->zbc_color[zbc].l2, l2, sizeof(priv->zbc_color[zbc].l2));
 	priv->zbc_color[zbc].format = format;
@@ -109,6 +112,9 @@ nvc0_graph_zbc_depth_get(struct nvc0_graph_priv *priv, int format,
 		}
 	}
 
+	if (zbc < 0)
+		return zbc;
+
 	priv->zbc_depth[zbc].format = format;
 	priv->zbc_depth[zbc].ds = ds;
 	priv->zbc_depth[zbc].l2 = l2;
diff --git a/drivers/gpu/drm/nouveau/core/include/core/client.h b/drivers/gpu/drm/nouveau/core/include/core/client.h
index 4fc6ab12382d..1794a05205d8 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/client.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/client.h
@@ -14,7 +14,7 @@ struct nouveau_client {
 	void *data;
 
 	int (*ntfy)(const void *, u32, const void *, u32);
-	struct nvkm_client_notify *notify[8];
+	struct nvkm_client_notify *notify[16];
 };
 
 static inline struct nouveau_client *
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
index 73b1ed20c8d5..8bcbdf39cfb2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
@@ -99,8 +99,13 @@ nouveau_bar_alloc(struct nouveau_bar *bar, struct nouveau_object *parent,
 		  struct nouveau_mem *mem, struct nouveau_object **pobject)
 {
 	struct nouveau_object *engine = nv_object(bar);
-	return nouveau_object_ctor(parent, engine, &nouveau_barobj_oclass,
-				   mem, 0, pobject);
+	int ret = -ENOMEM;
+	if (bar->iomem) {
+		ret = nouveau_object_ctor(parent, engine,
+					  &nouveau_barobj_oclass,
+					  mem, 0, pobject);
+	}
+	return ret;
 }
 
 int
@@ -118,9 +123,12 @@ nouveau_bar_create_(struct nouveau_object *parent,
 	if (ret)
 		return ret;
 
-	if (nv_device_resource_len(device, 3) != 0)
+	if (nv_device_resource_len(device, 3) != 0) {
 		bar->iomem = ioremap(nv_device_resource_start(device, 3),
 				     nv_device_resource_len(device, 3));
+		if (!bar->iomem)
+			nv_warn(bar, "PRAMIN ioremap failed\n");
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c
index 0a44459844e3..05a278bab247 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c
@@ -200,7 +200,6 @@ nvc0_bar_init(struct nouveau_object *object)
 
 	nv_mask(priv, 0x000200, 0x00000100, 0x00000000);
 	nv_mask(priv, 0x000200, 0x00000100, 0x00000100);
-	nv_mask(priv, 0x100c80, 0x00000001, 0x00000000);
 
 	nv_wr32(priv, 0x001704, 0x80000000 | priv->bar[1].mem->addr >> 12);
 	if (priv->bar[0].mem)
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
index b19a2b3c1081..32f28dc73ef2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
@@ -60,6 +60,7 @@ nvc0_fb_init(struct nouveau_object *object)
 
 	if (priv->r100c10_page)
 		nv_wr32(priv, 0x100c10, priv->r100c10 >> 8);
+	nv_mask(priv, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
index 946518572346..2b284b192763 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
@@ -554,13 +554,13 @@ nvc0_ram_create_(struct nouveau_object *parent, struct nouveau_object *engine,
 	} else {
 		/* otherwise, address lowest common amount from 0GiB */
 		ret = nouveau_mm_init(&pfb->vram, rsvd_head,
-				      (bsize << 8) * parts, 1);
+				      (bsize << 8) * parts - rsvd_head, 1);
 		if (ret)
 			return ret;
 
 		/* and the rest starting from (8GiB + common_size) */
 		offset = (0x0200000000ULL >> 12) + (bsize << 8);
-		length = (ram->size >> 12) - (bsize << 8) - rsvd_tail;
+		length = (ram->size >> 12) - ((bsize * parts) << 8) - rsvd_tail;
 
 		ret = nouveau_mm_init(&pfb->vram, offset, length, 0);
 		if (ret)
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c
index 9e00a1ede120..d5d65285efe5 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c
@@ -98,6 +98,7 @@ static int
 gf100_ltc_init(struct nouveau_object *object)
 {
 	struct nvkm_ltc_priv *priv = (void *)object;
+	u32 lpg128 = !(nv_rd32(priv, 0x100c80) & 0x00000001);
 	int ret;
 
 	ret = nvkm_ltc_init(priv);
@@ -107,6 +108,7 @@ gf100_ltc_init(struct nouveau_object *object)
 	nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */
 	nv_wr32(priv, 0x17e8d8, priv->ltc_nr);
 	nv_wr32(priv, 0x17e8d4, priv->tag_base);
+	nv_mask(priv, 0x17e8c0, 0x00000002, lpg128 ? 0x00000002 : 0x00000000);
 	return 0;
 }
 
@@ -156,7 +158,7 @@ gf100_ltc_init_tag_ram(struct nouveau_fb *pfb, struct nvkm_ltc_priv *priv)
 	if (ret) {
 		priv->num_tags = 0;
 	} else {
-		u64 tag_base = (priv->tag_ram->offset << 12) + tag_margin;
+		u64 tag_base = ((u64)priv->tag_ram->offset << 12) + tag_margin;
 
 		tag_base += tag_align - 1;
 		ret = do_div(tag_base, tag_align);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/gk104.c b/drivers/gpu/drm/nouveau/core/subdev/ltc/gk104.c
index ea716569745d..b39b5d0eb8f9 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/gk104.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/gk104.c
@@ -28,6 +28,7 @@ static int
 gk104_ltc_init(struct nouveau_object *object)
 {
 	struct nvkm_ltc_priv *priv = (void *)object;
+	u32 lpg128 = !(nv_rd32(priv, 0x100c80) & 0x00000001);
 	int ret;
 
 	ret = nvkm_ltc_init(priv);
@@ -37,6 +38,7 @@ gk104_ltc_init(struct nouveau_object *object)
 	nv_wr32(priv, 0x17e8d8, priv->ltc_nr);
 	nv_wr32(priv, 0x17e000, priv->ltc_nr);
 	nv_wr32(priv, 0x17e8d4, priv->tag_base);
+	nv_mask(priv, 0x17e8c0, 0x00000002, lpg128 ? 0x00000002 : 0x00000000);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c
index 4761b2e9af00..a4de64289762 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c
@@ -98,6 +98,7 @@ static int
 gm107_ltc_init(struct nouveau_object *object)
 {
 	struct nvkm_ltc_priv *priv = (void *)object;
+	u32 lpg128 = !(nv_rd32(priv, 0x100c80) & 0x00000001);
 	int ret;
 
 	ret = nvkm_ltc_init(priv);
@@ -106,6 +107,7 @@ gm107_ltc_init(struct nouveau_object *object)
 
 	nv_wr32(priv, 0x17e27c, priv->ltc_nr);
 	nv_wr32(priv, 0x17e278, priv->tag_base);
+	nv_mask(priv, 0x17e264, 0x00000002, lpg128 ? 0x00000002 : 0x00000000);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 279206997e5c..622424692b3b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -46,7 +46,6 @@ static struct nouveau_dsm_priv {
 	bool dsm_detected;
 	bool optimus_detected;
 	acpi_handle dhandle;
-	acpi_handle other_handle;
 	acpi_handle rom_handle;
 } nouveau_dsm_priv;
 
@@ -222,10 +221,9 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
 	if (!dhandle)
 		return false;
 
-	if (!acpi_has_method(dhandle, "_DSM")) {
-		nouveau_dsm_priv.other_handle = dhandle;
+	if (!acpi_has_method(dhandle, "_DSM"))
 		return false;
-	}
+
 	if (acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
 			   1 << NOUVEAU_DSM_POWER))
 		retval |= NOUVEAU_DSM_HAS_MUX;
@@ -301,16 +299,6 @@ static bool nouveau_dsm_detect(void)
 		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
 			acpi_method_name);
 		nouveau_dsm_priv.dsm_detected = true;
-		/*
-		 * On some systems hotplug events are generated for the device
-		 * being switched off when _DSM is executed.  They cause ACPI
-		 * hotplug to trigger and attempt to remove the device from
-		 * the system, which causes it to break down.  Prevent that from
-		 * happening by setting the no_hotplug flag for the involved
-		 * ACPI device objects.
-		 */
-		acpi_bus_no_hotplug(nouveau_dsm_priv.dhandle);
-		acpi_bus_no_hotplug(nouveau_dsm_priv.other_handle);
 		ret = true;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index da5d631aa5b9..01da508625f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1228,7 +1228,6 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	struct nouveau_drm *drm = nouveau_bdev(bdev);
 	struct nouveau_mem *node = mem->mm_node;
-	struct drm_device *dev = drm->dev;
 	int ret;
 
 	mem->bus.addr = NULL;
@@ -1247,7 +1246,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 		if (drm->agp.stat == ENABLED) {
 			mem->bus.offset = mem->start << PAGE_SHIFT;
 			mem->bus.base = drm->agp.base;
-			mem->bus.is_iomem = !dev->agp->cant_use_aperture;
+			mem->bus.is_iomem = !drm->dev->agp->cant_use_aperture;
 		}
 #endif
 		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA || !node->memtype)
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 1cc7b603c753..65b4fd53dd4e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -592,7 +592,9 @@ nouveau_display_repin(struct drm_device *dev)
 		if (!nouveau_fb || !nouveau_fb->nvbo)
 			continue;
 
-		nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM);
+		ret = nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM);
+		if (ret)
+			NV_ERROR(drm, "Could not pin framebuffer\n");
 	}
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 250a5e88c751..9c3af96a7153 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -627,6 +627,7 @@ int nouveau_pmops_suspend(struct device *dev)
 
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
+	pci_ignore_hotplug(pdev);
 	pci_set_power_state(pdev, PCI_D3hot);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index ebfe3180109e..8bdd27091db8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -226,7 +226,7 @@ nouveau_fbcon_accel_restore(struct drm_device *dev)
 	}
 }
 
-void
+static void
 nouveau_fbcon_accel_fini(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
@@ -246,7 +246,7 @@ nouveau_fbcon_accel_fini(struct drm_device *dev)
 	}
 }
 
-void
+static void
 nouveau_fbcon_accel_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 0ffeb50d0088..246a824c16ca 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -149,7 +149,8 @@ power_down:
 static int nouveau_platform_remove(struct platform_device *pdev)
 {
 	struct drm_device *drm_dev = platform_get_drvdata(pdev);
-	struct nouveau_device *device = nouveau_dev(drm_dev);
+	struct nouveau_drm *drm = nouveau_drm(drm_dev);
+	struct nouveau_device *device = nvkm_device(&drm->device);
 	struct nouveau_platform_gpu *gpu = nv_device_to_platform(device)->gpu;
 
 	nouveau_drm_device_remove(drm_dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index 18d55d447248..c7592ec8ecb8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -108,7 +108,16 @@ void
 nouveau_vga_fini(struct nouveau_drm *drm)
 {
 	struct drm_device *dev = drm->dev;
+	bool runtime = false;
+
+	if (nouveau_runtime_pm == 1)
+		runtime = true;
+	if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm()))
+		runtime = true;
+
 	vga_switcheroo_unregister_client(dev->pdev);
+	if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus())
+		vga_switcheroo_fini_domain_pm_ops(drm->dev->dev);
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvif/class.h b/drivers/gpu/drm/nouveau/nvif/class.h
index cc81e0e5fd30..573491f84792 100644
--- a/drivers/gpu/drm/nouveau/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/nvif/class.h
@@ -428,8 +428,8 @@ struct nv50_disp_dac_pwr_v0 {
 struct nv50_disp_dac_load_v0 {
 	__u8  version;
 	__u8  load;
-	__u16 data;
-	__u8  pad04[4];
+	__u8  pad02[2];
+	__u32 data;
 };
 
 struct nv50_disp_sor_pwr_v0 {
diff --git a/drivers/gpu/drm/nouveau/nvif/notify.c b/drivers/gpu/drm/nouveau/nvif/notify.c
index 7c06123a559c..0898c3155292 100644
--- a/drivers/gpu/drm/nouveau/nvif/notify.c
+++ b/drivers/gpu/drm/nouveau/nvif/notify.c
@@ -87,12 +87,25 @@ nvif_notify_get(struct nvif_notify *notify)
 	return 0;
 }
 
+static inline int
+nvif_notify_func(struct nvif_notify *notify, bool keep)
+{
+	int ret = notify->func(notify);
+	if (ret == NVIF_NOTIFY_KEEP ||
+	    !test_and_clear_bit(NVKM_NOTIFY_USER, &notify->flags)) {
+		if (!keep)
+			atomic_dec(&notify->putcnt);
+		else
+			nvif_notify_get_(notify);
+	}
+	return ret;
+}
+
 static void
 nvif_notify_work(struct work_struct *work)
 {
 	struct nvif_notify *notify = container_of(work, typeof(*notify), work);
-	if (notify->func(notify) == NVIF_NOTIFY_KEEP)
-		nvif_notify_get_(notify);
+	nvif_notify_func(notify, true);
 }
 
 int
@@ -113,19 +126,15 @@ nvif_notify(const void *header, u32 length, const void *data, u32 size)
 	if (!WARN_ON(notify == NULL)) {
 		struct nvif_client *client = nvif_client(notify->object);
 		if (!WARN_ON(notify->size != size)) {
+			atomic_inc(&notify->putcnt);
 			if (test_bit(NVIF_NOTIFY_WORK, &notify->flags)) {
-				atomic_inc(&notify->putcnt);
 				memcpy((void *)notify->data, data, size);
 				schedule_work(&notify->work);
 				return NVIF_NOTIFY_DROP;
 			}
 			notify->data = data;
-			ret = notify->func(notify);
+			ret = nvif_notify_func(notify, client->driver->keep);
 			notify->data = NULL;
-			if (ret != NVIF_NOTIFY_DROP && client->driver->keep) {
-				atomic_inc(&notify->putcnt);
-				nvif_notify_get_(notify);
-			}
 		}
 	}
 
@@ -228,8 +237,10 @@ nvif_notify_new(struct nvif_object *object, int (*func)(struct nvif_notify *),
 	if (notify) {
 		int ret = nvif_notify_init(object, nvif_notify_del, func, work,
 					   type, data, size, reply, notify);
-		if (ret)
+		if (ret) {
 			kfree(notify);
+			notify = NULL;
+		}
 		*pnotify = notify;
 		return ret;
 	}
diff --git a/drivers/gpu/drm/nouveau/nvif/object.c b/drivers/gpu/drm/nouveau/nvif/object.c
index b0c82206ece2..dd85b56f6aa5 100644
--- a/drivers/gpu/drm/nouveau/nvif/object.c
+++ b/drivers/gpu/drm/nouveau/nvif/object.c
@@ -275,8 +275,10 @@ nvif_object_new(struct nvif_object *parent, u32 handle, u32 oclass,
 	if (object) {
 		int ret = nvif_object_init(parent, nvif_object_del, handle,
 					   oclass, data, size, object);
-		if (ret)
+		if (ret) {
 			kfree(object);
+			object = NULL;
+		}
 		*pobject = object;
 		return ret;
 	}
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 0013ad0db9ef..f77b7135ee4c 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o \
 	evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
 	atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
-	si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o \
+	si_blit_shaders.o radeon_prime.o cik.o cik_blit_shaders.o \
 	r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index b1e11f8434e2..ac14b67621d3 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -405,16 +405,13 @@ bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector)
 	u8 msg[DP_DPCD_SIZE];
 	int ret;
 
-	char dpcd_hex_dump[DP_DPCD_SIZE * 3];
-
 	ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg,
 			       DP_DPCD_SIZE);
 	if (ret > 0) {
 		memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
 
-		hex_dump_to_buffer(dig_connector->dpcd, sizeof(dig_connector->dpcd),
-				   32, 1, dpcd_hex_dump, sizeof(dpcd_hex_dump), false);
-		DRM_DEBUG_KMS("DPCD: %s\n", dpcd_hex_dump);
+		DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd),
+			      dig_connector->dpcd);
 
 		radeon_dp_probe_oui(radeon_connector);
 
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 022561e28707..d416bb2ff48d 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -869,6 +869,9 @@ static int ci_set_thermal_temperature_range(struct radeon_device *rdev,
 	WREG32_SMC(CG_THERMAL_CTRL, tmp);
 #endif
 
+	rdev->pm.dpm.thermal.min_temp = low_temp;
+	rdev->pm.dpm.thermal.max_temp = high_temp;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index b625646bf3e2..fa9565957f9d 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3483,7 +3483,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
 	u32 mc_shared_chmap, mc_arb_ramcfg;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j, k;
+	int i, j;
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
@@ -3544,6 +3544,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
 			   (rdev->pdev->device == 0x130B) ||
 			   (rdev->pdev->device == 0x130E) ||
 			   (rdev->pdev->device == 0x1315) ||
+			   (rdev->pdev->device == 0x1318) ||
 			   (rdev->pdev->device == 0x131B)) {
 			rdev->config.cik.max_cu_per_sh = 4;
 			rdev->config.cik.max_backends_per_se = 1;
@@ -3672,12 +3673,11 @@ static void cik_gpu_init(struct radeon_device *rdev)
 		     rdev->config.cik.max_sh_per_se,
 		     rdev->config.cik.max_backends_per_se);
 
+	rdev->config.cik.active_cus = 0;
 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
-			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
-				rdev->config.cik.active_cus +=
-					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
-			}
+			rdev->config.cik.active_cus +=
+				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
 		}
 	}
 
@@ -3801,7 +3801,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
@@ -3920,6 +3920,17 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, 0);
 }
 
+/**
+ * cik_semaphore_ring_emit - emit a semaphore on the CP ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring buffer object
+ * @semaphore: radeon semaphore object
+ * @emit_wait: Is this a sempahore wait?
+ *
+ * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
+ * from running ahead of semaphore waits.
+ */
 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
 			     struct radeon_ring *ring,
 			     struct radeon_semaphore *semaphore,
@@ -3932,6 +3943,12 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
 
+	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
+		/* Prevent the PFP from running ahead of the semaphore wait */
+		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		radeon_ring_write(ring, 0x0);
+	}
+
 	return true;
 }
 
@@ -4004,7 +4021,7 @@ int cik_copy_cpdma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
@@ -4103,7 +4120,7 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);
@@ -4324,7 +4341,7 @@ static int cik_cp_gfx_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	return 0;
 }
@@ -5732,20 +5749,17 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32(0x15D8, 0);
 	WREG32(0x15DC, 0);
 
-	/* empty context1-15 */
-	/* FIXME start with 4G, once using 2 level pt switch to full
-	 * vm size space
-	 */
+	/* restore context1-15 */
 	/* set vm size, must be a multiple of 4 */
 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
 	for (i = 1; i < 16; i++) {
 		if (i < 8)
 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
-			       rdev->gart.table_addr >> 12);
+			       rdev->vm_manager.saved_table_addr[i]);
 		else
 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
-			       rdev->gart.table_addr >> 12);
+			       rdev->vm_manager.saved_table_addr[i]);
 	}
 
 	/* enable context1-15 */
@@ -5810,6 +5824,17 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
  */
 static void cik_pcie_gart_disable(struct radeon_device *rdev)
 {
+	unsigned i;
+
+	for (i = 1; i < 16; ++i) {
+		uint32_t reg;
+		if (i < 8)
+			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
+		else
+			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
+		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
+	}
+
 	/* Disable all tables */
 	WREG32(VM_CONTEXT0_CNTL, 0);
 	WREG32(VM_CONTEXT1_CNTL, 0);
@@ -5958,14 +5983,14 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 
 	/* update SH_MEM_* regs */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, VMID(vm->id));
 
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
 	radeon_ring_write(ring, 0);
@@ -5976,7 +6001,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
 
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
 	radeon_ring_write(ring, 0);
@@ -5987,7 +6012,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 
 	/* bits 0-15 are the VM contexts0-15 */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
 	radeon_ring_write(ring, 0);
@@ -9538,6 +9563,9 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
 	int ret, i;
 	u16 tmp16;
 
+	if (pci_is_root_bus(rdev->pdev->bus))
+		return;
+
 	if (radeon_pcie_gen2 == 0)
 		return;
 
@@ -9764,7 +9792,8 @@ static void cik_program_aspm(struct radeon_device *rdev)
 			if (orig != data)
 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
 
-			if (!disable_clkreq) {
+			if (!disable_clkreq &&
+			    !pci_is_root_bus(rdev->pdev->bus)) {
 				struct pci_dev *root = rdev->pdev->bus->self;
 				u32 lnkcap;
 
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index bcf480510ac2..c4ffa54b1e3d 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -489,13 +489,6 @@ int cik_sdma_resume(struct radeon_device *rdev)
 {
 	int r;
 
-	/* Reset dma */
-	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
-	RREG32(SRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(SRBM_SOFT_RESET, 0);
-	RREG32(SRBM_SOFT_RESET);
-
 	r = cik_sdma_load_microcode(rdev);
 	if (r)
 		return r;
@@ -596,7 +589,7 @@ int cik_copy_dma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
@@ -638,7 +631,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
 	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr));
 	radeon_ring_write(ring, 1); /* number of DWs to follow */
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = readl(ptr);
@@ -695,7 +688,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 4fedd14e670a..dbca60c7d097 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2869,7 +2869,7 @@ static int evergreen_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	cp_me = 0xff;
 	WREG32(CP_ME_CNTL, cp_me);
@@ -2912,7 +2912,7 @@ static int evergreen_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /*  */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index 478caefe0fef..afaba388c36d 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -155,7 +155,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index 9ef8c38f2d66..67cb472d188c 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -33,6 +33,8 @@
 #define KV_MINIMUM_ENGINE_CLOCK         800
 #define SMC_RAM_END                     0x40000
 
+static int kv_enable_nb_dpm(struct radeon_device *rdev,
+			    bool enable);
 static void kv_init_graphics_levels(struct radeon_device *rdev);
 static int kv_calculate_ds_divider(struct radeon_device *rdev);
 static int kv_calculate_nbps_level_settings(struct radeon_device *rdev);
@@ -1295,6 +1297,9 @@ void kv_dpm_disable(struct radeon_device *rdev)
 {
 	kv_smc_bapm_enable(rdev, false);
 
+	if (rdev->family == CHIP_MULLINS)
+		kv_enable_nb_dpm(rdev, false);
+
 	/* powerup blocks */
 	kv_dpm_powergate_acp(rdev, false);
 	kv_dpm_powergate_samu(rdev, false);
@@ -1438,14 +1443,14 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate)
 	return kv_enable_uvd_dpm(rdev, !gate);
 }
 
-static u8 kv_get_vce_boot_level(struct radeon_device *rdev)
+static u8 kv_get_vce_boot_level(struct radeon_device *rdev, u32 evclk)
 {
 	u8 i;
 	struct radeon_vce_clock_voltage_dependency_table *table =
 		&rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
 
 	for (i = 0; i < table->count; i++) {
-		if (table->entries[i].evclk >= 0) /* XXX */
+		if (table->entries[i].evclk >= evclk)
 			break;
 	}
 
@@ -1468,7 +1473,7 @@ static int kv_update_vce_dpm(struct radeon_device *rdev,
 		if (pi->caps_stable_p_state)
 			pi->vce_boot_level = table->count - 1;
 		else
-			pi->vce_boot_level = kv_get_vce_boot_level(rdev);
+			pi->vce_boot_level = kv_get_vce_boot_level(rdev, radeon_new_state->evclk);
 
 		ret = kv_copy_bytes_to_smc(rdev,
 					   pi->dpm_table_start +
@@ -1769,15 +1774,24 @@ static int kv_update_dfs_bypass_settings(struct radeon_device *rdev,
 	return ret;
 }
 
-static int kv_enable_nb_dpm(struct radeon_device *rdev)
+static int kv_enable_nb_dpm(struct radeon_device *rdev,
+			    bool enable)
 {
 	struct kv_power_info *pi = kv_get_pi(rdev);
 	int ret = 0;
 
-	if (pi->enable_nb_dpm && !pi->nb_dpm_enabled) {
-		ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_NBDPM_Enable);
-		if (ret == 0)
-			pi->nb_dpm_enabled = true;
+	if (enable) {
+		if (pi->enable_nb_dpm && !pi->nb_dpm_enabled) {
+			ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_NBDPM_Enable);
+			if (ret == 0)
+				pi->nb_dpm_enabled = true;
+		}
+	} else {
+		if (pi->enable_nb_dpm && pi->nb_dpm_enabled) {
+			ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_NBDPM_Disable);
+			if (ret == 0)
+				pi->nb_dpm_enabled = false;
+		}
 	}
 
 	return ret;
@@ -1864,7 +1878,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev)
 			}
 			kv_update_sclk_t(rdev);
 			if (rdev->family == CHIP_MULLINS)
-				kv_enable_nb_dpm(rdev);
+				kv_enable_nb_dpm(rdev, true);
 		}
 	} else {
 		if (pi->enable_dpm) {
@@ -1889,7 +1903,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev)
 			}
 			kv_update_acp_boot_level(rdev);
 			kv_update_sclk_t(rdev);
-			kv_enable_nb_dpm(rdev);
+			kv_enable_nb_dpm(rdev, true);
 		}
 	}
 
@@ -2726,7 +2740,10 @@ int kv_dpm_init(struct radeon_device *rdev)
 	pi->caps_sclk_ds = true;
 	pi->enable_auto_thermal_throttling = true;
 	pi->disable_nb_ps3_in_battery = false;
-	pi->bapm_enable = true;
+	if (radeon_bapm == 0)
+		pi->bapm_enable = false;
+	else
+		pi->bapm_enable = true;
 	pi->voltage_drop_t = 0;
 	pi->caps_sclk_throttle_low_notification = false;
 	pi->caps_fps = false; /* true? */
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 327b85f7fd0d..3faee58946dd 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1271,7 +1271,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
 		WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
 		WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
 		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
-			rdev->gart.table_addr >> 12);
+		       rdev->vm_manager.saved_table_addr[i]);
 	}
 
 	/* enable context1-7 */
@@ -1303,6 +1303,13 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
 
 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
 {
+	unsigned i;
+
+	for (i = 1; i < 8; ++i) {
+		rdev->vm_manager.saved_table_addr[i] = RREG32(
+			VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2));
+	}
+
 	/* Disable all tables */
 	WREG32(VM_CONTEXT0_CNTL, 0);
 	WREG32(VM_CONTEXT1_CNTL, 0);
@@ -1505,7 +1512,7 @@ static int cayman_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	cayman_cp_enable(rdev, true);
 
@@ -1547,7 +1554,7 @@ static int cayman_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /*  */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	/* XXX init other rings */
 
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 8a3e6221cece..f26f0a9fb522 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -191,12 +191,6 @@ int cayman_dma_resume(struct radeon_device *rdev)
 	u32 reg_offset, wb_offset;
 	int i, r;
 
-	/* Reset dma */
-	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
-	RREG32(SRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(SRBM_SOFT_RESET, 0);
-
 	for (i = 0; i < 2; i++) {
 		if (i == 0) {
 			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 04b5940b8923..b0098e792e62 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -821,6 +821,20 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 		return RREG32(RADEON_CRTC2_CRNT_FRAME);
 }
 
+/**
+ * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
+ * rdev: radeon device structure
+ * ring: ring buffer struct for emitting packets
+ */
+static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
+				RADEON_HDP_READ_BUFFER_INVALIDATE);
+	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
+	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+}
+
 /* Who ever call radeon_fence_emit should call ring_lock and ask
  * for enough space (today caller are ib schedule and buffer move) */
 void r100_fence_ring_emit(struct radeon_device *rdev,
@@ -925,7 +939,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 	if (fence) {
 		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 	}
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	return r;
 }
 
@@ -958,7 +972,7 @@ void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 			  RADEON_ISYNC_ANY3D_IDLE2D |
 			  RADEON_ISYNC_WAIT_IDLEGUI |
 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 
@@ -1056,20 +1070,6 @@ void r100_gfx_set_wptr(struct radeon_device *rdev,
 	(void)RREG32(RADEON_CP_RB_WPTR);
 }
 
-/**
- * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
- * rdev: radeon device structure
- * ring: ring buffer struct for emitting packets
- */
-void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
-				RADEON_HDP_READ_BUFFER_INVALIDATE);
-	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
-	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
-}
-
 static void r100_cp_load_microcode(struct radeon_device *rdev)
 {
 	const __be32 *fw_data;
@@ -3638,7 +3638,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	}
 	radeon_ring_write(ring, PACKET0(scratch, 0));
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF) {
@@ -3700,7 +3700,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[6] = PACKET2(0);
 	ib.ptr[7] = PACKET2(0);
 	ib.length_dw = 8;
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		goto free_ib;
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index 58f0473aa73f..67780374a652 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -121,7 +121,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 	if (fence) {
 		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 	}
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 75b30338c226..1bc4704034ce 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -295,7 +295,7 @@ void r300_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring,
 			  R300_GEOMETRY_ROUND_NEAREST |
 			  R300_COLOR_ROUND_NEAREST);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 static void r300_errata(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 802b19220a21..2828605aef3f 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -219,7 +219,7 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
 	radeon_ring_write(ring, rdev->config.r300.resync_scratch);
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
@@ -232,7 +232,7 @@ static void r420_cp_errata_fini(struct radeon_device *rdev)
 	radeon_ring_lock(rdev, ring, 8);
 	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 	radeon_ring_write(ring, R300_RB3D_DC_FINISH);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_scratch_free(rdev, rdev->config.r300.resync_scratch);
 }
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c70a504d96af..3cfb50056f7a 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1812,7 +1812,6 @@ static void r600_gpu_init(struct radeon_device *rdev)
 {
 	u32 tiling_config;
 	u32 ramcfg;
-	u32 cc_rb_backend_disable;
 	u32 cc_gc_shader_pipe_config;
 	u32 tmp;
 	int i, j;
@@ -1939,29 +1938,20 @@ static void r600_gpu_init(struct radeon_device *rdev)
 	}
 	tiling_config |= BANK_SWAPS(1);
 
-	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
-	tmp = R6XX_MAX_BACKENDS -
-		r600_count_pipe_bits((cc_rb_backend_disable >> 16) & R6XX_MAX_BACKENDS_MASK);
-	if (tmp < rdev->config.r600.max_backends) {
-		rdev->config.r600.max_backends = tmp;
-	}
-
 	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0x00ffff00;
-	tmp = R6XX_MAX_PIPES -
-		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R6XX_MAX_PIPES_MASK);
-	if (tmp < rdev->config.r600.max_pipes) {
-		rdev->config.r600.max_pipes = tmp;
-	}
-	tmp = R6XX_MAX_SIMDS -
-		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
-	if (tmp < rdev->config.r600.max_simds) {
-		rdev->config.r600.max_simds = tmp;
-	}
 	tmp = rdev->config.r600.max_simds -
 		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
 	rdev->config.r600.active_simds = tmp;
 
 	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
+	tmp = 0;
+	for (i = 0; i < rdev->config.r600.max_backends; i++)
+		tmp |= (1 << i);
+	/* if all the backends are disabled, fix it up here */
+	if ((disabled_rb_mask & tmp) == tmp) {
+		for (i = 0; i < rdev->config.r600.max_backends; i++)
+			disabled_rb_mask &= ~(1 << i);
+	}
 	tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
 	tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.r600.max_backends,
 					R6XX_MAX_BACKENDS, disabled_rb_mask);
@@ -2547,7 +2537,7 @@ int r600_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	cp_me = 0xff;
 	WREG32(R_0086D8_CP_ME_CNTL, cp_me);
@@ -2683,7 +2673,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
@@ -2753,6 +2743,17 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 	}
 }
 
+/**
+ * r600_semaphore_ring_emit - emit a semaphore on the CP ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring buffer object
+ * @semaphore: radeon semaphore object
+ * @emit_wait: Is this a sempahore wait?
+ *
+ * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
+ * from running ahead of semaphore waits.
+ */
 bool r600_semaphore_ring_emit(struct radeon_device *rdev,
 			      struct radeon_ring *ring,
 			      struct radeon_semaphore *semaphore,
@@ -2768,6 +2769,13 @@ bool r600_semaphore_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
 
+	/* PFP_SYNC_ME packet only exists on 7xx+, only enable it on eg+ */
+	if (emit_wait && (rdev->family >= CHIP_CEDAR)) {
+		/* Prevent the PFP from running ahead of the semaphore wait */
+		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		radeon_ring_write(ring, 0x0);
+	}
+
 	return true;
 }
 
@@ -2845,7 +2853,7 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
@@ -3165,7 +3173,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		goto free_ib;
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index 4969cef44a19..a908daa006d2 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -124,15 +124,6 @@ int r600_dma_resume(struct radeon_device *rdev)
 	u32 rb_bufsz;
 	int r;
 
-	/* Reset dma */
-	if (rdev->family >= CHIP_RV770)
-		WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
-	else
-		WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
-	RREG32(SRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(SRBM_SOFT_RESET, 0);
-
 	WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
 	WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
 
@@ -261,7 +252,7 @@ int r600_dma_ring_test(struct radeon_device *rdev,
 	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
 	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = readl(ptr);
@@ -368,7 +359,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[3] = 0xDEADBEEF;
 	ib.length_dw = 4;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
@@ -493,7 +484,7 @@ int r600_copy_dma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index f94e7a9afe75..31e1052ad3e3 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -44,13 +44,6 @@
 #define R6XX_MAX_PIPES				8
 #define R6XX_MAX_PIPES_MASK			0xff
 
-/* PTE flags */
-#define PTE_VALID				(1 << 0)
-#define PTE_SYSTEM				(1 << 1)
-#define PTE_SNOOPED				(1 << 2)
-#define PTE_READABLE				(1 << 5)
-#define PTE_WRITEABLE				(1 << 6)
-
 /* tiling bits */
 #define     ARRAY_LINEAR_GENERAL              0x00000000
 #define     ARRAY_LINEAR_ALIGNED              0x00000001
@@ -1597,6 +1590,7 @@
 		 */
 #              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
 #              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
+#define	PACKET3_PFP_SYNC_ME				0x42 /* r7xx+ only */
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
 #              define PACKET3_FULL_CACHE_ENA       (1 << 20) /* r7xx+ only */
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 9e1732eb402c..5f05b4c84338 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -105,6 +105,7 @@ extern int radeon_vm_size;
 extern int radeon_vm_block_size;
 extern int radeon_deep_color;
 extern int radeon_use_pflipirq;
+extern int radeon_bapm;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
@@ -914,6 +915,8 @@ struct radeon_vm_manager {
 	u64				vram_base_offset;
 	/* is vm enabled? */
 	bool				enabled;
+	/* for hw to save the PD addr on suspend/resume */
+	uint32_t			saved_table_addr[RADEON_NUM_VM];
 };
 
 /*
@@ -967,7 +970,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
-		       struct radeon_ib *const_ib);
+		       struct radeon_ib *const_ib, bool hdp_flush);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
 int radeon_ib_ring_tests(struct radeon_device *rdev);
@@ -977,8 +980,10 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
-void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp);
-void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp);
+void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp,
+			bool hdp_flush);
+void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp,
+			       bool hdp_flush);
 void radeon_ring_undo(struct radeon_ring *ring);
 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index eeeeabe09758..2dd5847f9b98 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -185,7 +185,6 @@ static struct radeon_asic_ring r100_gfx_ring = {
 	.get_rptr = &r100_gfx_get_rptr,
 	.get_wptr = &r100_gfx_get_wptr,
 	.set_wptr = &r100_gfx_set_wptr,
-	.hdp_flush = &r100_ring_hdp_flush,
 };
 
 static struct radeon_asic r100_asic = {
@@ -332,7 +331,6 @@ static struct radeon_asic_ring r300_gfx_ring = {
 	.get_rptr = &r100_gfx_get_rptr,
 	.get_wptr = &r100_gfx_get_wptr,
 	.set_wptr = &r100_gfx_set_wptr,
-	.hdp_flush = &r100_ring_hdp_flush,
 };
 
 static struct radeon_asic r300_asic = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 275a5dc01780..7756bc1e1cd3 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -148,8 +148,7 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev,
 		      struct radeon_ring *ring);
 void r100_gfx_set_wptr(struct radeon_device *rdev,
 		       struct radeon_ring *ring);
-void r100_ring_hdp_flush(struct radeon_device *rdev,
-			 struct radeon_ring *ring);
+
 /*
  * r200,rv250,rs300,rv280
  */
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 92b2d8dd4735..e74c7e387dde 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -447,6 +447,13 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 		}
 	}
 
+	/* Fujitsu D3003-S2 board lists DVI-I as DVI-I and VGA */
+	if ((dev->pdev->device == 0x9805) &&
+	    (dev->pdev->subsystem_vendor == 0x1734) &&
+	    (dev->pdev->subsystem_device == 0x11bd)) {
+		if (*connector_type == DRM_MODE_CONNECTOR_VGA)
+			return false;
+	}
 
 	return true;
 }
@@ -2281,19 +2288,31 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r
 				 (controller->ucFanParameters &
 				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
 			rdev->pm.int_thermal_type = THERMAL_TYPE_KV;
-		} else if ((controller->ucType ==
-			    ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) ||
-			   (controller->ucType ==
-			    ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL) ||
-			   (controller->ucType ==
-			    ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL)) {
-			DRM_INFO("Special thermal controller config\n");
+		} else if (controller->ucType ==
+			   ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) {
+			DRM_INFO("External GPIO thermal controller %s fan control\n",
+				 (controller->ucFanParameters &
+				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
+			rdev->pm.int_thermal_type = THERMAL_TYPE_EXTERNAL_GPIO;
+		} else if (controller->ucType ==
+			   ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL) {
+			DRM_INFO("ADT7473 with internal thermal controller %s fan control\n",
+				 (controller->ucFanParameters &
+				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
+			rdev->pm.int_thermal_type = THERMAL_TYPE_ADT7473_WITH_INTERNAL;
+		} else if (controller->ucType ==
+			   ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL) {
+			DRM_INFO("EMC2103 with internal thermal controller %s fan control\n",
+				 (controller->ucFanParameters &
+				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
+			rdev->pm.int_thermal_type = THERMAL_TYPE_EMC2103_WITH_INTERNAL;
 		} else if (controller->ucType < ARRAY_SIZE(pp_lib_thermal_controller_names)) {
 			DRM_INFO("Possible %s thermal controller at 0x%02x %s fan control\n",
 				 pp_lib_thermal_controller_names[controller->ucType],
 				 controller->ucI2cAddress >> 1,
 				 (controller->ucFanParameters &
 				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
+			rdev->pm.int_thermal_type = THERMAL_TYPE_EXTERNAL;
 			i2c_bus = radeon_lookup_i2c_gpio(rdev, controller->ucI2cLine);
 			rdev->pm.i2c_bus = radeon_i2c_lookup(rdev, &i2c_bus);
 			if (rdev->pm.i2c_bus) {
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index a9fb0d016d38..8bc7d0bbd3c8 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -33,7 +33,6 @@ static struct radeon_atpx_priv {
 	bool atpx_detected;
 	/* handle for device - and atpx */
 	acpi_handle dhandle;
-	acpi_handle other_handle;
 	struct radeon_atpx atpx;
 } radeon_atpx_priv;
 
@@ -453,10 +452,9 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 		return false;
 
 	status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
-	if (ACPI_FAILURE(status)) {
-		radeon_atpx_priv.other_handle = dhandle;
+	if (ACPI_FAILURE(status))
 		return false;
-	}
+
 	radeon_atpx_priv.dhandle = dhandle;
 	radeon_atpx_priv.atpx.handle = atpx_handle;
 	return true;
@@ -540,16 +538,6 @@ static bool radeon_atpx_detect(void)
 		printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		radeon_atpx_priv.atpx_detected = true;
-		/*
-		 * On some systems hotplug events are generated for the device
-		 * being switched off when ATPX is executed.  They cause ACPI
-		 * hotplug to trigger and attempt to remove the device from
-		 * the system, which causes it to break down.  Prevent that from
-		 * happening by setting the no_hotplug flag for the involved
-		 * ACPI device objects.
-		 */
-		acpi_bus_no_hotplug(radeon_atpx_priv.dhandle);
-		acpi_bus_no_hotplug(radeon_atpx_priv.other_handle);
 		return true;
 	}
 	return false;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index ee712c199b25..83f382e8e40e 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -132,7 +132,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		 * the buffers used for read only, which doubles the range
 		 * to 0 to 31. 32 is reserved for the kernel driver.
 		 */
-		priority = (r->flags & 0xf) * 2 + !!r->write_domain;
+		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
+			   + !!r->write_domain;
 
 		/* the first reloc of an UVD job is the msg and that must be in
 		   VRAM, also but everything into VRAM on AGP cards to avoid
@@ -450,7 +451,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 		radeon_vce_note_usage(rdev);
 
 	radeon_cs_sync_rings(parser);
-	r = radeon_ib_schedule(rdev, &parser->ib, NULL);
+	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 	if (r) {
 		DRM_ERROR("Failed to schedule IB !\n");
 	}
@@ -541,9 +542,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
-		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
+		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
 	} else {
-		r = radeon_ib_schedule(rdev, &parser->ib, NULL);
+		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 	}
 
 out:
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index c8ea050c8fa4..75223dd3a8a3 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1393,7 +1393,7 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	r = radeon_init(rdev);
 	if (r)
-		return r;
+		goto failed;
 
 	r = radeon_ib_ring_tests(rdev);
 	if (r)
@@ -1413,7 +1413,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		radeon_agp_disable(rdev);
 		r = radeon_init(rdev);
 		if (r)
-			return r;
+			goto failed;
 	}
 
 	if ((radeon_testing & 1)) {
@@ -1435,6 +1435,11 @@ int radeon_device_init(struct radeon_device *rdev,
 			DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n");
 	}
 	return 0;
+
+failed:
+	if (runtime)
+		vga_switcheroo_fini_domain_pm_ops(rdev->dev);
+	return r;
 }
 
 static void radeon_debugfs_remove_files(struct radeon_device *rdev);
@@ -1455,6 +1460,8 @@ void radeon_device_fini(struct radeon_device *rdev)
 	radeon_bo_evict_vram(rdev);
 	radeon_fini(rdev);
 	vga_switcheroo_unregister_client(rdev->pdev);
+	if (rdev->flags & RADEON_IS_PX)
+		vga_switcheroo_fini_domain_pm_ops(rdev->dev);
 	vga_client_register(rdev->pdev, NULL, NULL, NULL);
 	if (rdev->rio_mem)
 		pci_iounmap(rdev->pdev, rdev->rio_mem);
@@ -1680,8 +1687,8 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
-	radeon_pm_suspend(rdev);
 	radeon_suspend(rdev);
+	radeon_hpd_fini(rdev);
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
@@ -1726,9 +1733,39 @@ retry:
 		}
 	}
 
-	radeon_pm_resume(rdev);
+	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+		/* do dpm late init */
+		r = radeon_pm_late_init(rdev);
+		if (r) {
+			rdev->pm.dpm_enabled = false;
+			DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
+		}
+	} else {
+		/* resume old pm late */
+		radeon_pm_resume(rdev);
+	}
+
+	/* init dig PHYs, disp eng pll */
+	if (rdev->is_atom_bios) {
+		radeon_atom_encoder_init(rdev);
+		radeon_atom_disp_eng_pll_init(rdev);
+		/* turn on the BL */
+		if (rdev->mode_info.bl_encoder) {
+			u8 bl_level = radeon_get_backlight_level(rdev,
+								 rdev->mode_info.bl_encoder);
+			radeon_set_backlight_level(rdev, rdev->mode_info.bl_encoder,
+						   bl_level);
+		}
+	}
+	/* reset hpd state */
+	radeon_hpd_init(rdev);
+
 	drm_helper_resume_force_mode(rdev->ddev);
 
+	/* set the power state here in case we are a PX system or headless */
+	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
+		radeon_pm_compute_clocks(rdev);
+
 	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 092d067f93e1..4126fd0937a2 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -83,7 +83,7 @@
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
  *   2.39.0 - Add INFO query for number of active CUs
  *   2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
- *            CS to GPU
+ *            CS to GPU on >= r600
  */
 #define KMS_DRIVER_MAJOR	2
 #define KMS_DRIVER_MINOR	40
@@ -180,6 +180,7 @@ int radeon_vm_size = 8;
 int radeon_vm_block_size = -1;
 int radeon_deep_color = 0;
 int radeon_use_pflipirq = 2;
+int radeon_bapm = -1;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -259,6 +260,9 @@ module_param_named(deep_color, radeon_deep_color, int, 0444);
 MODULE_PARM_DESC(use_pflipirq, "Pflip irqs for pageflip completion (0 = disable, 1 = as fallback, 2 = exclusive (default))");
 module_param_named(use_pflipirq, radeon_use_pflipirq, int, 0444);
 
+MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(bapm, radeon_bapm, int, 0444);
+
 static struct pci_device_id pciidlist[] = {
 	radeon_PCI_IDS
 };
@@ -436,6 +440,7 @@ static int radeon_pmops_runtime_suspend(struct device *dev)
 	ret = radeon_suspend_kms(drm_dev, false, false);
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
+	pci_ignore_hotplug(pdev);
 	pci_set_power_state(pdev, PCI_D3cold);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 65b0c213488d..5bf2c0a05827 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -107,6 +107,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
  * @rdev: radeon_device pointer
  * @ib: IB object to schedule
  * @const_ib: Const IB to schedule (SI only)
+ * @hdp_flush: Whether or not to perform an HDP cache flush
  *
  * Schedule an IB on the associated ring (all asics).
  * Returns 0 on success, error on failure.
@@ -122,7 +123,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
  * to SI there was just a DE IB.
  */
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
-		       struct radeon_ib *const_ib)
+		       struct radeon_ib *const_ib, bool hdp_flush)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 	int r = 0;
@@ -176,7 +177,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	if (ib->vm)
 		radeon_vm_fence(rdev, ib->vm, ib->fence);
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, hdp_flush);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 23314be49480..164898b0010c 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -460,10 +460,6 @@ static ssize_t radeon_get_dpm_state(struct device *dev,
 	struct radeon_device *rdev = ddev->dev_private;
 	enum radeon_pm_state_type pm = rdev->pm.dpm.user_state;
 
-	if  ((rdev->flags & RADEON_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return snprintf(buf, PAGE_SIZE, "off\n");
-
 	return snprintf(buf, PAGE_SIZE, "%s\n",
 			(pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
 			(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
@@ -477,11 +473,6 @@ static ssize_t radeon_set_dpm_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct radeon_device *rdev = ddev->dev_private;
 
-	/* Can't set dpm state when the card is off */
-	if  ((rdev->flags & RADEON_IS_PX) &&
-	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
-		return -EINVAL;
-
 	mutex_lock(&rdev->pm.mutex);
 	if (strncmp("battery", buf, strlen("battery")) == 0)
 		rdev->pm.dpm.user_state = POWER_STATE_TYPE_BATTERY;
@@ -495,7 +486,12 @@ static ssize_t radeon_set_dpm_state(struct device *dev,
 		goto fail;
 	}
 	mutex_unlock(&rdev->pm.mutex);
-	radeon_pm_compute_clocks(rdev);
+
+	/* Can't set dpm state when the card is off */
+	if (!(rdev->flags & RADEON_IS_PX) ||
+	    (ddev->switch_power_state == DRM_SWITCH_POWER_ON))
+		radeon_pm_compute_clocks(rdev);
+
 fail:
 	return count;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 5b4e0cf231a0..d65607902537 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -177,16 +177,18 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
  *
  * @rdev: radeon_device pointer
  * @ring: radeon_ring structure holding ring information
+ * @hdp_flush: Whether or not to perform an HDP cache flush
  *
  * Update the wptr (write pointer) to tell the GPU to
  * execute new commands on the ring buffer (all asics).
  */
-void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
+void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring,
+			bool hdp_flush)
 {
 	/* If we are emitting the HDP flush via the ring buffer, we need to
 	 * do it before padding.
 	 */
-	if (rdev->asic->ring[ring->idx]->hdp_flush)
+	if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush)
 		rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
 	/* We pad to match fetch size */
 	while (ring->wptr & ring->align_mask) {
@@ -196,7 +198,7 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 	/* If we are emitting the HDP flush via MMIO, we need to do it after
 	 * all CPU writes to VRAM finished.
 	 */
-	if (rdev->asic->mmio_hdp_flush)
+	if (hdp_flush && rdev->asic->mmio_hdp_flush)
 		rdev->asic->mmio_hdp_flush(rdev);
 	radeon_ring_set_wptr(rdev, ring);
 }
@@ -207,12 +209,14 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
  *
  * @rdev: radeon_device pointer
  * @ring: radeon_ring structure holding ring information
+ * @hdp_flush: Whether or not to perform an HDP cache flush
  *
  * Call radeon_ring_commit() then unlock the ring (all asics).
  */
-void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring)
+void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring,
+			       bool hdp_flush)
 {
-	radeon_ring_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring, hdp_flush);
 	mutex_unlock(&rdev->ring_lock);
 }
 
@@ -372,7 +376,7 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
 		radeon_ring_write(ring, data[i]);
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	kfree(data);
 	return 0;
 }
@@ -400,9 +404,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
 		r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT,
-				     (rdev->flags & RADEON_IS_PCIE) ?
-				     RADEON_GEM_GTT_WC : 0,
+				     RADEON_GEM_DOMAIN_GTT, 0,
 				     NULL, &ring->ring_obj);
 		if (r) {
 			dev_err(rdev->dev, "(%d) ring create failed\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index dbd6bcde92de..abd6753a570a 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -34,7 +34,7 @@
 int radeon_semaphore_create(struct radeon_device *rdev,
 			    struct radeon_semaphore **semaphore)
 {
-	uint32_t *cpu_addr;
+	uint64_t *cpu_addr;
 	int i, r;
 
 	*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
@@ -179,7 +179,7 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
 			continue;
 		}
 
-		radeon_ring_commit(rdev, &rdev->ring[i]);
+		radeon_ring_commit(rdev, &rdev->ring[i], false);
 		radeon_fence_note_sync(fence, ring);
 
 		semaphore->gpu_addr += 8;
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 5adf4207453d..17bc3dced9f1 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -288,7 +288,7 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
 			return r;
 		}
 		radeon_fence_emit(rdev, fence, ring->idx);
-		radeon_ring_unlock_commit(rdev, ring);
+		radeon_ring_unlock_commit(rdev, ring, false);
 	}
 	return 0;
 }
@@ -313,7 +313,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringA);
+	radeon_ring_unlock_commit(rdev, ringA, false);
 
 	r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
 	if (r)
@@ -325,7 +325,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringA);
+	radeon_ring_unlock_commit(rdev, ringA, false);
 
 	r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
 	if (r)
@@ -344,7 +344,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringB);
+	radeon_ring_unlock_commit(rdev, ringB, false);
 
 	r = radeon_fence_wait(fence1, false);
 	if (r) {
@@ -365,7 +365,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringB);
+	radeon_ring_unlock_commit(rdev, ringB, false);
 
 	r = radeon_fence_wait(fence2, false);
 	if (r) {
@@ -408,7 +408,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringA);
+	radeon_ring_unlock_commit(rdev, ringA, false);
 
 	r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
 	if (r)
@@ -420,7 +420,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringB);
+	radeon_ring_unlock_commit(rdev, ringB, false);
 	r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
 	if (r)
 		goto out_cleanup;
@@ -442,7 +442,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringC);
+	radeon_ring_unlock_commit(rdev, ringC, false);
 
 	for (i = 0; i < 30; ++i) {
 		mdelay(100);
@@ -468,7 +468,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
-	radeon_ring_unlock_commit(rdev, ringC);
+	radeon_ring_unlock_commit(rdev, ringC, false);
 
 	mdelay(1000);
 
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 6bf55ec85b62..341848a14376 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -646,7 +646,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev,
 		ib.ptr[i] = PACKET2(0);
 	ib.length_dw = 16;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r)
 		goto err;
 	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index f9b70a43aa52..c7190aadbd89 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -368,7 +368,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
 	for (i = ib.length_dw; i < ib_size_dw; ++i)
 		ib.ptr[i] = 0x0;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
@@ -425,7 +425,7 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
 	for (i = ib.length_dw; i < ib_size_dw; ++i)
 		ib.ptr[i] = 0x0;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
@@ -715,7 +715,7 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	radeon_ring_write(ring, VCE_CMD_END);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 	        if (vce_v1_0_get_rptr(rdev, ring) != rptr)
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index ccae4d9dc3de..088ffdc2f577 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -420,7 +420,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > 64);
 
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r)
                 goto error;
 
@@ -483,6 +483,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			/* add a clone of the bo_va to clear the old address */
 			struct radeon_bo_va *tmp;
 			tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+			if (!tmp) {
+				mutex_unlock(&vm->mutex);
+				return -ENOMEM;
+			}
 			tmp->it.start = bo_va->it.start;
 			tmp->it.last = bo_va->it.last;
 			tmp->vm = vm;
@@ -693,7 +697,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
 		radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
 		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
 		WARN_ON(ib.length_dw > ndw);
-		r = radeon_ib_schedule(rdev, &ib, NULL);
+		r = radeon_ib_schedule(rdev, &ib, NULL, false);
 		if (r) {
 			radeon_ib_free(rdev, &ib);
 			return r;
@@ -957,7 +961,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	WARN_ON(ib.length_dw > ndw);
 
 	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
-	r = radeon_ib_schedule(rdev, &ib, NULL);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		return r;
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 6c1fc339d228..c5799f16aa4b 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -221,9 +221,9 @@ void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
 	entry = (lower_32_bits(addr) & PAGE_MASK) |
 		((upper_32_bits(addr) & 0xff) << 4);
 	if (flags & RADEON_GART_PAGE_READ)
-		addr |= RS400_PTE_READABLE;
+		entry |= RS400_PTE_READABLE;
 	if (flags & RADEON_GART_PAGE_WRITE)
-		addr |= RS400_PTE_WRITEABLE;
+		entry |= RS400_PTE_WRITEABLE;
 	if (!(flags & RADEON_GART_PAGE_SNOOP))
 		entry |= RS400_PTE_UNSNOOPED;
 	entry = cpu_to_le32(entry);
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 3e21e869015f..8a477bf1fdb3 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -124,7 +124,7 @@ void rv515_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST);
 	radeon_ring_write(ring, PACKET0(0x20C8, 0));
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 int rv515_mc_wait_for_idle(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 2983f17ea1b3..d9f5ce715c9b 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1177,7 +1177,6 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 	u32 hdp_host_path_cntl;
 	u32 sq_dyn_gpr_size_simd_ab_0;
 	u32 gb_tiling_config = 0;
-	u32 cc_rb_backend_disable = 0;
 	u32 cc_gc_shader_pipe_config = 0;
 	u32 mc_arb_ramcfg;
 	u32 db_debug4, tmp;
@@ -1311,21 +1310,7 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 		WREG32(SPI_CONFIG_CNTL, 0);
 	}
 
-	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
-	tmp = R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_rb_backend_disable >> 16);
-	if (tmp < rdev->config.rv770.max_backends) {
-		rdev->config.rv770.max_backends = tmp;
-	}
-
 	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
-	tmp = R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 8) & R7XX_MAX_PIPES_MASK);
-	if (tmp < rdev->config.rv770.max_pipes) {
-		rdev->config.rv770.max_pipes = tmp;
-	}
-	tmp = R7XX_MAX_SIMDS - r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
-	if (tmp < rdev->config.rv770.max_simds) {
-		rdev->config.rv770.max_simds = tmp;
-	}
 	tmp = rdev->config.rv770.max_simds -
 		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
 	rdev->config.rv770.active_simds = tmp;
@@ -1348,6 +1333,14 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 	rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
 
 	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R7XX_MAX_BACKENDS_MASK;
+	tmp = 0;
+	for (i = 0; i < rdev->config.rv770.max_backends; i++)
+		tmp |= (1 << i);
+	/* if all the backends are disabled, fix it up here */
+	if ((disabled_rb_mask & tmp) == tmp) {
+		for (i = 0; i < rdev->config.rv770.max_backends; i++)
+			disabled_rb_mask &= ~(1 << i);
+	}
 	tmp = (gb_tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
 	tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.rv770.max_backends,
 					R7XX_MAX_BACKENDS, disabled_rb_mask);
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index bbf2e076ee45..74426ac2bb5c 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -90,7 +90,7 @@ int rv770_copy_dma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 011779bd2b3d..6bce40847753 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3057,7 +3057,7 @@ static void si_gpu_init(struct radeon_device *rdev)
 	u32 sx_debug_1;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j, k;
+	int i, j;
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
@@ -3255,12 +3255,11 @@ static void si_gpu_init(struct radeon_device *rdev)
 		     rdev->config.si.max_sh_per_se,
 		     rdev->config.si.max_cu_per_sh);
 
+	rdev->config.si.active_cus = 0;
 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
-			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
-				rdev->config.si.active_cus +=
-					hweight32(si_get_cu_active_bitmap(rdev, i, j));
-			}
+			rdev->config.si.active_cus +=
+				hweight32(si_get_cu_active_bitmap(rdev, i, j));
 		}
 	}
 
@@ -3541,7 +3540,7 @@ static int si_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
 	radeon_ring_write(ring, 0xc000);
 	radeon_ring_write(ring, 0xe000);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	si_cp_enable(rdev, true);
 
@@ -3570,7 +3569,7 @@ static int si_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
 		ring = &rdev->ring[i];
@@ -3580,7 +3579,7 @@ static int si_cp_start(struct radeon_device *rdev)
 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
 		radeon_ring_write(ring, 0);
 
-		radeon_ring_unlock_commit(rdev, ring);
+		radeon_ring_unlock_commit(rdev, ring, false);
 	}
 
 	return 0;
@@ -4291,10 +4290,10 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
 	for (i = 1; i < 16; i++) {
 		if (i < 8)
 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
-			       rdev->gart.table_addr >> 12);
+			       rdev->vm_manager.saved_table_addr[i]);
 		else
 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
-			       rdev->gart.table_addr >> 12);
+			       rdev->vm_manager.saved_table_addr[i]);
 	}
 
 	/* enable context1-15 */
@@ -4326,6 +4325,17 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
 
 static void si_pcie_gart_disable(struct radeon_device *rdev)
 {
+	unsigned i;
+
+	for (i = 1; i < 16; ++i) {
+		uint32_t reg;
+		if (i < 8)
+			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
+		else
+			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
+		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
+	}
+
 	/* Disable all tables */
 	WREG32(VM_CONTEXT0_CNTL, 0);
 	WREG32(VM_CONTEXT1_CNTL, 0);
@@ -5028,7 +5038,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 
 	/* flush hdp cache */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
 	radeon_ring_write(ring, 0);
@@ -5036,7 +5046,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 
 	/* bits 0-15 are the VM contexts0-15 */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
 	radeon_ring_write(ring, 0);
@@ -7178,6 +7188,9 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
 	int ret, i;
 	u16 tmp16;
 
+	if (pci_is_root_bus(rdev->pdev->bus))
+		return;
+
 	if (radeon_pcie_gen2 == 0)
 		return;
 
@@ -7455,7 +7468,8 @@ static void si_program_aspm(struct radeon_device *rdev)
 			if (orig != data)
 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
 
-			if (!disable_clkreq) {
+			if (!disable_clkreq &&
+			    !pci_is_root_bus(rdev->pdev->bus)) {
 				struct pci_dev *root = rdev->pdev->bus->self;
 				u32 lnkcap;
 
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index 716505129450..7c22baaf94db 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -275,7 +275,7 @@ int si_copy_dma(struct radeon_device *rdev,
 		return r;
 	}
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	radeon_semaphore_free(rdev, &sem, *fence);
 
 	return r;
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 32e50be9c4ac..57f780053b3e 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1874,16 +1874,22 @@ int trinity_dpm_init(struct radeon_device *rdev)
 	for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++)
 		pi->at[i] = TRINITY_AT_DFLT;
 
-	/* There are stability issues reported on with
-	 * bapm enabled when switching between AC and battery
-	 * power.  At the same time, some MSI boards hang
-	 * if it's not enabled and dpm is enabled.  Just enable
-	 * it for MSI boards right now.
-	 */
-	if (rdev->pdev->subsystem_vendor == 0x1462)
-		pi->enable_bapm = true;
-	else
+	if (radeon_bapm == -1) {
+		/* There are stability issues reported on with
+		 * bapm enabled when switching between AC and battery
+		 * power.  At the same time, some MSI boards hang
+		 * if it's not enabled and dpm is enabled.  Just enable
+		 * it for MSI boards right now.
+		 */
+		if (rdev->pdev->subsystem_vendor == 0x1462)
+			pi->enable_bapm = true;
+		else
+			pi->enable_bapm = false;
+	} else if (radeon_bapm == 0) {
 		pi->enable_bapm = false;
+	} else {
+		pi->enable_bapm = true;
+	}
 	pi->enable_nbps_policy = true;
 	pi->enable_sclk_ds = true;
 	pi->enable_gfx_power_gating = true;
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
index be42c8125203..cda391347286 100644
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -124,7 +124,7 @@ int uvd_v1_0_init(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
 	radeon_ring_write(ring, 3);
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 
 done:
 	/* lower clocks again */
@@ -331,7 +331,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	}
 	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_unlock_commit(rdev, ring, false);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(UVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig
index 2d9d4252d598..ae8850f3e63b 100644
--- a/drivers/gpu/drm/sti/Kconfig
+++ b/drivers/gpu/drm/sti/Kconfig
@@ -1,6 +1,7 @@
 config DRM_STI
 	tristate "DRM Support for STMicroelectronics SoC stiH41x Series"
 	depends on DRM && (SOC_STIH415 || SOC_STIH416 || ARCH_MULTIPLATFORM)
+	select RESET_CONTROLLER
 	select DRM_KMS_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_CMA_HELPER
diff --git a/drivers/gpu/drm/sti/sti_drm_drv.c b/drivers/gpu/drm/sti/sti_drm_drv.c
index a7cc24917a96..223d93c3a05d 100644
--- a/drivers/gpu/drm/sti/sti_drm_drv.c
+++ b/drivers/gpu/drm/sti/sti_drm_drv.c
@@ -201,8 +201,8 @@ static int sti_drm_platform_probe(struct platform_device *pdev)
 	master = platform_device_register_resndata(dev,
 			DRIVER_NAME "__master", -1,
 			NULL, 0, NULL, 0);
-	if (!master)
-		return -EINVAL;
+	if (IS_ERR(master))
+               return PTR_ERR(master);
 
 	platform_set_drvdata(pdev, master);
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index 72d957f81c05..2ae9a9b73666 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -730,16 +730,16 @@ static int sti_hda_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 	hda->regs = devm_ioremap_nocache(dev, res->start, resource_size(res));
-	if (IS_ERR(hda->regs))
-		return PTR_ERR(hda->regs);
+	if (!hda->regs)
+		return -ENOMEM;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 			"video-dacs-ctrl");
 	if (res) {
 		hda->video_dacs_ctrl = devm_ioremap_nocache(dev, res->start,
 				resource_size(res));
-		if (IS_ERR(hda->video_dacs_ctrl))
-			return PTR_ERR(hda->video_dacs_ctrl);
+		if (!hda->video_dacs_ctrl)
+			return -ENOMEM;
 	} else {
 		/* If no existing video-dacs-ctrl resource continue the probe */
 		DRM_DEBUG_DRIVER("No video-dacs-ctrl resource\n");
@@ -770,7 +770,7 @@ static int sti_hda_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct of_device_id hda_of_match[] = {
+static const struct of_device_id hda_of_match[] = {
 	{ .compatible = "st,stih416-hda", },
 	{ .compatible = "st,stih407-hda", },
 	{ /* end node */ }
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 284e541d970d..b22968c08d1f 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -298,7 +298,6 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
 	hdmi_write(hdmi, val, HDMI_SW_DI_N_PKT_WORD2(HDMI_IFRAME_SLOT_AVI));
 
 	val = frame[0xC];
-	val |= frame[0xD] << 8;
 	hdmi_write(hdmi, val, HDMI_SW_DI_N_PKT_WORD3(HDMI_IFRAME_SLOT_AVI));
 
 	/* Enable transmission slot for AVI infoframe
@@ -677,7 +676,7 @@ static const struct component_ops sti_hdmi_ops = {
 	.unbind = sti_hdmi_unbind,
 };
 
-static struct of_device_id hdmi_of_match[] = {
+static const struct of_device_id hdmi_of_match[] = {
 	{
 		.compatible = "st,stih416-hdmi",
 		.data = &tx3g0c55phy_ops,
@@ -713,8 +712,8 @@ static int sti_hdmi_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 	hdmi->regs = devm_ioremap_nocache(dev, res->start, resource_size(res));
-	if (IS_ERR(hdmi->regs))
-		return PTR_ERR(hdmi->regs);
+	if (!hdmi->regs)
+		return -ENOMEM;
 
 	if (of_device_is_compatible(np, "st,stih416-hdmi")) {
 		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
@@ -725,8 +724,8 @@ static int sti_hdmi_probe(struct platform_device *pdev)
 		}
 		hdmi->syscfg = devm_ioremap_nocache(dev, res->start,
 						    resource_size(res));
-		if (IS_ERR(hdmi->syscfg))
-			return PTR_ERR(hdmi->syscfg);
+		if (!hdmi->syscfg)
+			return -ENOMEM;
 
 	}
 
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index b69e26fee76e..b8afe490356a 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -591,8 +591,8 @@ static int sti_tvout_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 	tvout->regs = devm_ioremap_nocache(dev, res->start, resource_size(res));
-	if (IS_ERR(tvout->regs))
-		return PTR_ERR(tvout->regs);
+	if (!tvout->regs)
+		return -ENOMEM;
 
 	/* get reset resources */
 	tvout->reset = devm_reset_control_get(dev, "tvout");
@@ -624,7 +624,7 @@ static int sti_tvout_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct of_device_id tvout_of_match[] = {
+static const struct of_device_id tvout_of_match[] = {
 	{ .compatible = "st,stih416-tvout", },
 	{ .compatible = "st,stih407-tvout", },
 	{ /* end node */ }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 7bfdaa163a33..36b871686d3c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -450,11 +450,11 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv,
 					  res,
 					  id_loc - sw_context->buf_start);
 	if (unlikely(ret != 0))
-		goto out_err;
+		return ret;
 
 	ret = vmw_resource_val_add(sw_context, res, &node);
 	if (unlikely(ret != 0))
-		goto out_err;
+		return ret;
 
 	if (res_type == vmw_res_context && dev_priv->has_mob &&
 	    node->first_usage) {
@@ -468,13 +468,13 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv,
 
 		ret = vmw_resource_context_res_add(dev_priv, sw_context, res);
 		if (unlikely(ret != 0))
-			goto out_err;
+			return ret;
 		node->staged_bindings =
 			kzalloc(sizeof(*node->staged_bindings), GFP_KERNEL);
 		if (node->staged_bindings == NULL) {
 			DRM_ERROR("Failed to allocate context binding "
 				  "information.\n");
-			goto out_err;
+			return -ENOMEM;
 		}
 		INIT_LIST_HEAD(&node->staged_bindings->list);
 	}
@@ -482,8 +482,7 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv,
 	if (p_val)
 		*p_val = node;
 
-out_err:
-	return ret;
+	return 0;
 }
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 6ccd993e26bf..6eae14d2a3f7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -180,8 +180,9 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 
 	mutex_lock(&dev_priv->hw_mutex);
 
+	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
 	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
-		vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
+		;
 
 	dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);