From d5bc60f6ad05b3c676b057bec662cfafc3ee24dd Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 18 Jan 2015 09:33:17 +1300
Subject: drm/vc4: Add create and map BO ioctls.

While there exist dumb APIs for creating and mapping BOs, one of the
rules is that drivers doing 3D acceleration have to provide their own
APIs for buffer allocation (besides, the pitch/height parameters of
the dumb alloc don't really make sense for a lot of 3D allocations).

v2: Use __u32-style types, use "drm.h" instead of <drm/drm.h>.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 include/uapi/drm/Kbuild    |  1 +
 include/uapi/drm/vc4_drm.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 include/uapi/drm/vc4_drm.h

(limited to 'include/uapi')

diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index 38d437096c35..974fcd54e36f 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -17,4 +17,5 @@ header-y += tegra_drm.h
 header-y += via_drm.h
 header-y += vmwgfx_drm.h
 header-y += msm_drm.h
+header-y += vc4_drm.h
 header-y += virtgpu_drm.h
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
new file mode 100644
index 000000000000..219d34c42272
--- /dev/null
+++ b/include/uapi/drm/vc4_drm.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _UAPI_VC4_DRM_H_
+#define _UAPI_VC4_DRM_H_
+
+#include "drm.h"
+
+#define DRM_VC4_CREATE_BO                         0x03
+#define DRM_VC4_MMAP_BO                           0x04
+
+#define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
+#define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+
+/**
+ * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc4_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	__u32 pad;
+};
+
+/**
+ * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc4_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+#endif /* _UAPI_VC4_DRM_H_ */
-- 
cgit v1.2.3-58-ga151


From 463873d5701427f2964a0b4b72c45f1f14b6df87 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 30 Nov 2015 11:41:40 -0800
Subject: drm/vc4: Add an API for creating GPU shaders in GEM BOs.

Since we have no MMU, the kernel needs to validate that the submitted
shader code won't make any accesses to memory that the user doesn't
control, which involves banning some operations (general purpose DMA
writes), and tracking where we need to write out pointers for other
operations (texture sampling).  Once it's validated, we return a GEM
BO containing the shader, which doesn't allow mapping for write or
exporting to other subsystems.

v2: Use __u32-style types.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/Makefile               |   3 +-
 drivers/gpu/drm/vc4/vc4_bo.c               | 140 ++++++++
 drivers/gpu/drm/vc4/vc4_drv.c              |   9 +-
 drivers/gpu/drm/vc4/vc4_drv.h              |  50 +++
 drivers/gpu/drm/vc4/vc4_qpu_defines.h      | 264 +++++++++++++++
 drivers/gpu/drm/vc4/vc4_validate_shaders.c | 513 +++++++++++++++++++++++++++++
 include/uapi/drm/vc4_drm.h                 |  25 ++
 7 files changed, 999 insertions(+), 5 deletions(-)
 create mode 100644 drivers/gpu/drm/vc4/vc4_qpu_defines.h
 create mode 100644 drivers/gpu/drm/vc4/vc4_validate_shaders.c

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 32b4f9cd8f52..eb776a6a9899 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -10,7 +10,8 @@ vc4-y := \
 	vc4_kms.o \
 	vc4_hdmi.o \
 	vc4_hvs.o \
-	vc4_plane.o
+	vc4_plane.o \
+	vc4_validate_shaders.o
 
 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
 
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 06cba268e17a..18dfe3ec9a62 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -79,6 +79,12 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
 	struct drm_gem_object *obj = &bo->base.base;
 	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
 
+	if (bo->validated_shader) {
+		kfree(bo->validated_shader->texture_samples);
+		kfree(bo->validated_shader);
+		bo->validated_shader = NULL;
+	}
+
 	vc4->bo_stats.num_allocated--;
 	vc4->bo_stats.size_allocated -= obj->size;
 	drm_gem_cma_free_object(obj);
@@ -315,6 +321,12 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 		goto out;
 	}
 
+	if (bo->validated_shader) {
+		kfree(bo->validated_shader->texture_samples);
+		kfree(bo->validated_shader);
+		bo->validated_shader = NULL;
+	}
+
 	bo->free_time = jiffies;
 	list_add(&bo->size_head, cache_list);
 	list_add(&bo->unref_head, &vc4->bo_cache.time_list);
@@ -347,6 +359,78 @@ static void vc4_bo_cache_time_timer(unsigned long data)
 	schedule_work(&vc4->bo_cache.time_work);
 }
 
+struct dma_buf *
+vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("Attempting to export shader BO\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return drm_gem_prime_export(dev, obj, flags);
+}
+
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	gem_obj = vma->vm_private_data;
+	bo = to_vc4_bo(gem_obj);
+
+	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+	 * the whole buffer.
+	 */
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
+
+	ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
+				    bo->base.vaddr, bo->base.paddr,
+				    vma->vm_end - vma->vm_start);
+	if (ret)
+		drm_gem_vm_close(vma);
+
+	return ret;
+}
+
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	return drm_gem_cma_prime_mmap(obj, vma);
+}
+
+void *vc4_prime_vmap(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("mmaping of shader BOs not allowed.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return drm_gem_cma_prime_vmap(obj);
+}
+
 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
@@ -387,6 +471,62 @@ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+int
+vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct drm_vc4_create_shader_bo *args = data;
+	struct vc4_bo *bo = NULL;
+	int ret;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	if (args->size % sizeof(u64) != 0)
+		return -EINVAL;
+
+	if (args->flags != 0) {
+		DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
+		return -EINVAL;
+	}
+
+	if (args->pad != 0) {
+		DRM_INFO("Pad set: 0x%08x\n", args->pad);
+		return -EINVAL;
+	}
+
+	bo = vc4_bo_create(dev, args->size, true);
+	if (!bo)
+		return -ENOMEM;
+
+	ret = copy_from_user(bo->base.vaddr,
+			     (void __user *)(uintptr_t)args->data,
+			     args->size);
+	if (ret != 0)
+		goto fail;
+	/* Clear the rest of the memory from allocating from the BO
+	 * cache.
+	 */
+	memset(bo->base.vaddr + args->size, 0,
+	       bo->base.base.size - args->size);
+
+	bo->validated_shader = vc4_validate_shader(&bo->base);
+	if (!bo->validated_shader) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* We have to create the handle after validation, to avoid
+	 * races for users to do doing things like mmap the shader BO.
+	 */
+	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+
+ fail:
+	drm_gem_object_unreference_unlocked(&bo->base.base);
+
+	return ret;
+}
+
 void vc4_bo_cache_init(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 5fa468864ec8..da4be9c8f70d 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -64,7 +64,7 @@ static const struct file_operations vc4_drm_fops = {
 	.open = drm_open,
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
-	.mmap = drm_gem_cma_mmap,
+	.mmap = vc4_mmap,
 	.poll = drm_poll,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
@@ -76,6 +76,7 @@ static const struct file_operations vc4_drm_fops = {
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -102,12 +103,12 @@ static struct drm_driver vc4_drm_driver = {
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_export = vc4_prime_export,
 	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
 	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
-	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+	.gem_prime_vmap = vc4_prime_vmap,
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
-	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+	.gem_prime_mmap = vc4_prime_mmap,
 
 	.dumb_create = vc4_dumb_create,
 	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index fddb0a06ed3a..bd77d5574e30 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -69,6 +69,11 @@ struct vc4_bo {
 
 	/* List entry for the BO's position in vc4_dev->bo_cache.size_list */
 	struct list_head size_head;
+
+	/* Struct for shader validation state, if created by
+	 * DRM_IOCTL_VC4_CREATE_SHADER_BO.
+	 */
+	struct vc4_validated_shader_info *validated_shader;
 };
 
 static inline struct vc4_bo *
@@ -117,6 +122,42 @@ to_vc4_encoder(struct drm_encoder *encoder)
 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
 
+/**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+ *
+ * This will be used at draw time to relocate the reference to the texture
+ * contents in p0, and validate that the offset combined with
+ * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
+ * Note that the hardware treats unprovided config parameters as 0, so not all
+ * of them need to be set up for every texure sample, and we'll store ~0 as
+ * the offset to mark the unused ones.
+ *
+ * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
+ * Setup") for definitions of the texture parameters.
+ */
+struct vc4_texture_sample_info {
+	bool is_direct;
+	uint32_t p_offset[4];
+};
+
+/**
+ * struct vc4_validated_shader_info - information about validated shaders that
+ * needs to be used from command list validation.
+ *
+ * For a given shader, each time a shader state record references it, we need
+ * to verify that the shader doesn't read more uniforms than the shader state
+ * record's uniform BO pointer can provide, and we need to apply relocations
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+struct vc4_validated_shader_info {
+	uint32_t uniforms_size;
+	uint32_t uniforms_src_size;
+	uint32_t num_texture_samples;
+	struct vc4_texture_sample_info *texture_samples;
+};
+
 /**
  * _wait_for - magic (register) wait macro
  *
@@ -157,8 +198,13 @@ struct dma_buf *vc4_prime_export(struct drm_device *dev,
 				 struct drm_gem_object *obj, int flags);
 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+void *vc4_prime_vmap(struct drm_gem_object *obj);
 void vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
 int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
@@ -194,3 +240,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 				 enum drm_plane_type type);
 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
 u32 vc4_plane_dlist_size(struct drm_plane_state *state);
+
+/* vc4_validate_shader.c */
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
new file mode 100644
index 000000000000..d5c2f3c85ebb
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -0,0 +1,264 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_QPU_DEFINES_H
+#define VC4_QPU_DEFINES_H
+
+enum qpu_op_add {
+	QPU_A_NOP,
+	QPU_A_FADD,
+	QPU_A_FSUB,
+	QPU_A_FMIN,
+	QPU_A_FMAX,
+	QPU_A_FMINABS,
+	QPU_A_FMAXABS,
+	QPU_A_FTOI,
+	QPU_A_ITOF,
+	QPU_A_ADD = 12,
+	QPU_A_SUB,
+	QPU_A_SHR,
+	QPU_A_ASR,
+	QPU_A_ROR,
+	QPU_A_SHL,
+	QPU_A_MIN,
+	QPU_A_MAX,
+	QPU_A_AND,
+	QPU_A_OR,
+	QPU_A_XOR,
+	QPU_A_NOT,
+	QPU_A_CLZ,
+	QPU_A_V8ADDS = 30,
+	QPU_A_V8SUBS = 31,
+};
+
+enum qpu_op_mul {
+	QPU_M_NOP,
+	QPU_M_FMUL,
+	QPU_M_MUL24,
+	QPU_M_V8MULD,
+	QPU_M_V8MIN,
+	QPU_M_V8MAX,
+	QPU_M_V8ADDS,
+	QPU_M_V8SUBS,
+};
+
+enum qpu_raddr {
+	QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
+	/* 0-31 are the plain regfile a or b fields */
+	QPU_R_UNIF = 32,
+	QPU_R_VARY = 35,
+	QPU_R_ELEM_QPU = 38,
+	QPU_R_NOP,
+	QPU_R_XY_PIXEL_COORD = 41,
+	QPU_R_MS_REV_FLAGS = 41,
+	QPU_R_VPM = 48,
+	QPU_R_VPM_LD_BUSY,
+	QPU_R_VPM_LD_WAIT,
+	QPU_R_MUTEX_ACQUIRE,
+};
+
+enum qpu_waddr {
+	/* 0-31 are the plain regfile a or b fields */
+	QPU_W_ACC0 = 32, /* aka r0 */
+	QPU_W_ACC1,
+	QPU_W_ACC2,
+	QPU_W_ACC3,
+	QPU_W_TMU_NOSWAP,
+	QPU_W_ACC5,
+	QPU_W_HOST_INT,
+	QPU_W_NOP,
+	QPU_W_UNIFORMS_ADDRESS,
+	QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
+	QPU_W_MS_FLAGS = 42,
+	QPU_W_REV_FLAG = 42,
+	QPU_W_TLB_STENCIL_SETUP = 43,
+	QPU_W_TLB_Z,
+	QPU_W_TLB_COLOR_MS,
+	QPU_W_TLB_COLOR_ALL,
+	QPU_W_TLB_ALPHA_MASK,
+	QPU_W_VPM,
+	QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
+	QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
+	QPU_W_MUTEX_RELEASE,
+	QPU_W_SFU_RECIP,
+	QPU_W_SFU_RECIPSQRT,
+	QPU_W_SFU_EXP,
+	QPU_W_SFU_LOG,
+	QPU_W_TMU0_S,
+	QPU_W_TMU0_T,
+	QPU_W_TMU0_R,
+	QPU_W_TMU0_B,
+	QPU_W_TMU1_S,
+	QPU_W_TMU1_T,
+	QPU_W_TMU1_R,
+	QPU_W_TMU1_B,
+};
+
+enum qpu_sig_bits {
+	QPU_SIG_SW_BREAKPOINT,
+	QPU_SIG_NONE,
+	QPU_SIG_THREAD_SWITCH,
+	QPU_SIG_PROG_END,
+	QPU_SIG_WAIT_FOR_SCOREBOARD,
+	QPU_SIG_SCOREBOARD_UNLOCK,
+	QPU_SIG_LAST_THREAD_SWITCH,
+	QPU_SIG_COVERAGE_LOAD,
+	QPU_SIG_COLOR_LOAD,
+	QPU_SIG_COLOR_LOAD_END,
+	QPU_SIG_LOAD_TMU0,
+	QPU_SIG_LOAD_TMU1,
+	QPU_SIG_ALPHA_MASK_LOAD,
+	QPU_SIG_SMALL_IMM,
+	QPU_SIG_LOAD_IMM,
+	QPU_SIG_BRANCH
+};
+
+enum qpu_mux {
+	/* hardware mux values */
+	QPU_MUX_R0,
+	QPU_MUX_R1,
+	QPU_MUX_R2,
+	QPU_MUX_R3,
+	QPU_MUX_R4,
+	QPU_MUX_R5,
+	QPU_MUX_A,
+	QPU_MUX_B,
+
+	/* non-hardware mux values */
+	QPU_MUX_IMM,
+};
+
+enum qpu_cond {
+	QPU_COND_NEVER,
+	QPU_COND_ALWAYS,
+	QPU_COND_ZS,
+	QPU_COND_ZC,
+	QPU_COND_NS,
+	QPU_COND_NC,
+	QPU_COND_CS,
+	QPU_COND_CC,
+};
+
+enum qpu_pack_mul {
+	QPU_PACK_MUL_NOP,
+	/* replicated to each 8 bits of the 32-bit dst. */
+	QPU_PACK_MUL_8888 = 3,
+	QPU_PACK_MUL_8A,
+	QPU_PACK_MUL_8B,
+	QPU_PACK_MUL_8C,
+	QPU_PACK_MUL_8D,
+};
+
+enum qpu_pack_a {
+	QPU_PACK_A_NOP,
+	/* convert to 16 bit float if float input, or to int16. */
+	QPU_PACK_A_16A,
+	QPU_PACK_A_16B,
+	/* replicated to each 8 bits of the 32-bit dst. */
+	QPU_PACK_A_8888,
+	/* Convert to 8-bit unsigned int. */
+	QPU_PACK_A_8A,
+	QPU_PACK_A_8B,
+	QPU_PACK_A_8C,
+	QPU_PACK_A_8D,
+
+	/* Saturating variants of the previous instructions. */
+	QPU_PACK_A_32_SAT, /* int-only */
+	QPU_PACK_A_16A_SAT, /* int or float */
+	QPU_PACK_A_16B_SAT,
+	QPU_PACK_A_8888_SAT,
+	QPU_PACK_A_8A_SAT,
+	QPU_PACK_A_8B_SAT,
+	QPU_PACK_A_8C_SAT,
+	QPU_PACK_A_8D_SAT,
+};
+
+enum qpu_unpack_r4 {
+	QPU_UNPACK_R4_NOP,
+	QPU_UNPACK_R4_F16A_TO_F32,
+	QPU_UNPACK_R4_F16B_TO_F32,
+	QPU_UNPACK_R4_8D_REP,
+	QPU_UNPACK_R4_8A,
+	QPU_UNPACK_R4_8B,
+	QPU_UNPACK_R4_8C,
+	QPU_UNPACK_R4_8D,
+};
+
+#define QPU_MASK(high, low) \
+	((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
+
+#define QPU_GET_FIELD(word, field) \
+	((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
+
+#define QPU_SIG_SHIFT                   60
+#define QPU_SIG_MASK                    QPU_MASK(63, 60)
+
+#define QPU_UNPACK_SHIFT                57
+#define QPU_UNPACK_MASK                 QPU_MASK(59, 57)
+
+/**
+ * If set, the pack field means PACK_MUL or R4 packing, instead of normal
+ * regfile a packing.
+ */
+#define QPU_PM                          ((uint64_t)1 << 56)
+
+#define QPU_PACK_SHIFT                  52
+#define QPU_PACK_MASK                   QPU_MASK(55, 52)
+
+#define QPU_COND_ADD_SHIFT              49
+#define QPU_COND_ADD_MASK               QPU_MASK(51, 49)
+#define QPU_COND_MUL_SHIFT              46
+#define QPU_COND_MUL_MASK               QPU_MASK(48, 46)
+
+#define QPU_SF                          ((uint64_t)1 << 45)
+
+#define QPU_WADDR_ADD_SHIFT             38
+#define QPU_WADDR_ADD_MASK              QPU_MASK(43, 38)
+#define QPU_WADDR_MUL_SHIFT             32
+#define QPU_WADDR_MUL_MASK              QPU_MASK(37, 32)
+
+#define QPU_OP_MUL_SHIFT                29
+#define QPU_OP_MUL_MASK                 QPU_MASK(31, 29)
+
+#define QPU_RADDR_A_SHIFT               18
+#define QPU_RADDR_A_MASK                QPU_MASK(23, 18)
+#define QPU_RADDR_B_SHIFT               12
+#define QPU_RADDR_B_MASK                QPU_MASK(17, 12)
+#define QPU_SMALL_IMM_SHIFT             12
+#define QPU_SMALL_IMM_MASK              QPU_MASK(17, 12)
+
+#define QPU_ADD_A_SHIFT                 9
+#define QPU_ADD_A_MASK                  QPU_MASK(11, 9)
+#define QPU_ADD_B_SHIFT                 6
+#define QPU_ADD_B_MASK                  QPU_MASK(8, 6)
+#define QPU_MUL_A_SHIFT                 3
+#define QPU_MUL_A_MASK                  QPU_MASK(5, 3)
+#define QPU_MUL_B_SHIFT                 0
+#define QPU_MUL_B_MASK                  QPU_MASK(2, 0)
+
+#define QPU_WS                          ((uint64_t)1 << 44)
+
+#define QPU_OP_ADD_SHIFT                24
+#define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
+
+#endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
new file mode 100644
index 000000000000..f67124b4c534
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Shader validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory, so a user with
+ * access to execute shaders could escalate privilege by overwriting
+ * system memory (using the VPM write address register in the
+ * general-purpose DMA mode) or reading system memory it shouldn't
+ * (reading it as a texture, or uniform data, or vertex data).
+ *
+ * This walks over a shader BO, ensuring that its accesses are
+ * appropriately bounded, and recording how many texture accesses are
+ * made and where so that we can do relocations for them in the
+ * uniform stream.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_qpu_defines.h"
+
+struct vc4_shader_validation_state {
+	struct vc4_texture_sample_info tmu_setup[2];
+	int tmu_write_count[2];
+
+	/* For registers that were last written to by a MIN instruction with
+	 * one argument being a uniform, the address of the uniform.
+	 * Otherwise, ~0.
+	 *
+	 * This is used for the validation of direct address memory reads.
+	 */
+	uint32_t live_min_clamp_offsets[32 + 32 + 4];
+	bool live_max_clamp_regs[32 + 32 + 4];
+};
+
+static uint32_t
+waddr_to_live_reg_index(uint32_t waddr, bool is_b)
+{
+	if (waddr < 32) {
+		if (is_b)
+			return 32 + waddr;
+		else
+			return waddr;
+	} else if (waddr <= QPU_W_ACC3) {
+		return 64 + waddr - QPU_W_ACC0;
+	} else {
+		return ~0;
+	}
+}
+
+static uint32_t
+raddr_add_a_to_live_reg_index(uint64_t inst)
+{
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+	if (add_a == QPU_MUX_A)
+		return raddr_a;
+	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
+		return 32 + raddr_b;
+	else if (add_a <= QPU_MUX_R3)
+		return 64 + add_a;
+	else
+		return ~0;
+}
+
+static bool
+is_tmu_submit(uint32_t waddr)
+{
+	return (waddr == QPU_W_TMU0_S ||
+		waddr == QPU_W_TMU1_S);
+}
+
+static bool
+is_tmu_write(uint32_t waddr)
+{
+	return (waddr >= QPU_W_TMU0_S &&
+		waddr <= QPU_W_TMU1_B);
+}
+
+static bool
+record_texture_sample(struct vc4_validated_shader_info *validated_shader,
+		      struct vc4_shader_validation_state *validation_state,
+		      int tmu)
+{
+	uint32_t s = validated_shader->num_texture_samples;
+	int i;
+	struct vc4_texture_sample_info *temp_samples;
+
+	temp_samples = krealloc(validated_shader->texture_samples,
+				(s + 1) * sizeof(*temp_samples),
+				GFP_KERNEL);
+	if (!temp_samples)
+		return false;
+
+	memcpy(&temp_samples[s],
+	       &validation_state->tmu_setup[tmu],
+	       sizeof(*temp_samples));
+
+	validated_shader->num_texture_samples = s + 1;
+	validated_shader->texture_samples = temp_samples;
+
+	for (i = 0; i < 4; i++)
+		validation_state->tmu_setup[tmu].p_offset[i] = ~0;
+
+	return true;
+}
+
+static bool
+check_tmu_write(uint64_t inst,
+		struct vc4_validated_shader_info *validated_shader,
+		struct vc4_shader_validation_state *validation_state,
+		bool is_mul)
+{
+	uint32_t waddr = (is_mul ?
+			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	int tmu = waddr > QPU_W_TMU0_B;
+	bool submit = is_tmu_submit(waddr);
+	bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+	if (is_direct) {
+		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+		uint32_t clamp_reg, clamp_offset;
+
+		if (sig == QPU_SIG_SMALL_IMM) {
+			DRM_ERROR("direct TMU read used small immediate\n");
+			return false;
+		}
+
+		/* Make sure that this texture load is an add of the base
+		 * address of the UBO to a clamped offset within the UBO.
+		 */
+		if (is_mul ||
+		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+			DRM_ERROR("direct TMU load wasn't an add\n");
+			return false;
+		}
+
+		/* We assert that the the clamped address is the first
+		 * argument, and the UBO base address is the second argument.
+		 * This is arbitrary, but simpler than supporting flipping the
+		 * two either way.
+		 */
+		clamp_reg = raddr_add_a_to_live_reg_index(inst);
+		if (clamp_reg == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
+		}
+
+		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
+		if (clamp_offset == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
+		}
+
+		/* Store the clamp value's offset in p1 (see reloc_tex() in
+		 * vc4_validate.c).
+		 */
+		validation_state->tmu_setup[tmu].p_offset[1] =
+			clamp_offset;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+			DRM_ERROR("direct TMU load didn't add to a uniform\n");
+			return false;
+		}
+
+		validation_state->tmu_setup[tmu].is_direct = true;
+	} else {
+		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
+					      raddr_b == QPU_R_UNIF)) {
+			DRM_ERROR("uniform read in the same instruction as "
+				  "texture setup.\n");
+			return false;
+		}
+	}
+
+	if (validation_state->tmu_write_count[tmu] >= 4) {
+		DRM_ERROR("TMU%d got too many parameters before dispatch\n",
+			  tmu);
+		return false;
+	}
+	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
+		validated_shader->uniforms_size;
+	validation_state->tmu_write_count[tmu]++;
+	/* Since direct uses a RADDR uniform reference, it will get counted in
+	 * check_instruction_reads()
+	 */
+	if (!is_direct)
+		validated_shader->uniforms_size += 4;
+
+	if (submit) {
+		if (!record_texture_sample(validated_shader,
+					   validation_state, tmu)) {
+			return false;
+		}
+
+		validation_state->tmu_write_count[tmu] = 0;
+	}
+
+	return true;
+}
+
+static bool
+check_reg_write(uint64_t inst,
+		struct vc4_validated_shader_info *validated_shader,
+		struct vc4_shader_validation_state *validation_state,
+		bool is_mul)
+{
+	uint32_t waddr = (is_mul ?
+			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+
+	switch (waddr) {
+	case QPU_W_UNIFORMS_ADDRESS:
+		/* XXX: We'll probably need to support this for reladdr, but
+		 * it's definitely a security-related one.
+		 */
+		DRM_ERROR("uniforms address load unsupported\n");
+		return false;
+
+	case QPU_W_TLB_COLOR_MS:
+	case QPU_W_TLB_COLOR_ALL:
+	case QPU_W_TLB_Z:
+		/* These only interact with the tile buffer, not main memory,
+		 * so they're safe.
+		 */
+		return true;
+
+	case QPU_W_TMU0_S:
+	case QPU_W_TMU0_T:
+	case QPU_W_TMU0_R:
+	case QPU_W_TMU0_B:
+	case QPU_W_TMU1_S:
+	case QPU_W_TMU1_T:
+	case QPU_W_TMU1_R:
+	case QPU_W_TMU1_B:
+		return check_tmu_write(inst, validated_shader, validation_state,
+				       is_mul);
+
+	case QPU_W_HOST_INT:
+	case QPU_W_TMU_NOSWAP:
+	case QPU_W_TLB_ALPHA_MASK:
+	case QPU_W_MUTEX_RELEASE:
+		/* XXX: I haven't thought about these, so don't support them
+		 * for now.
+		 */
+		DRM_ERROR("Unsupported waddr %d\n", waddr);
+		return false;
+
+	case QPU_W_VPM_ADDR:
+		DRM_ERROR("General VPM DMA unsupported\n");
+		return false;
+
+	case QPU_W_VPM:
+	case QPU_W_VPMVCD_SETUP:
+		/* We allow VPM setup in general, even including VPM DMA
+		 * configuration setup, because the (unsafe) DMA can only be
+		 * triggered by QPU_W_VPM_ADDR writes.
+		 */
+		return true;
+
+	case QPU_W_TLB_STENCIL_SETUP:
+		return true;
+	}
+
+	return true;
+}
+
+static void
+track_live_clamps(uint64_t inst,
+		  struct vc4_validated_shader_info *validated_shader,
+		  struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	bool ws = inst & QPU_WS;
+	uint32_t lri_add_a, lri_add, lri_mul;
+	bool add_a_is_min_0;
+
+	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+	 * before we clear previous live state.
+	 */
+	lri_add_a = raddr_add_a_to_live_reg_index(inst);
+	add_a_is_min_0 = (lri_add_a != ~0 &&
+			  validation_state->live_max_clamp_regs[lri_add_a]);
+
+	/* Clear live state for registers written by our instruction. */
+	lri_add = waddr_to_live_reg_index(waddr_add, ws);
+	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+	if (lri_mul != ~0) {
+		validation_state->live_max_clamp_regs[lri_mul] = false;
+		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+	}
+	if (lri_add != ~0) {
+		validation_state->live_max_clamp_regs[lri_add] = false;
+		validation_state->live_min_clamp_offsets[lri_add] = ~0;
+	} else {
+		/* Nothing further to do for live tracking, since only ADDs
+		 * generate new live clamp registers.
+		 */
+		return;
+	}
+
+	/* Now, handle remaining live clamp tracking for the ADD operation. */
+
+	if (cond_add != QPU_COND_ALWAYS)
+		return;
+
+	if (op_add == QPU_A_MAX) {
+		/* Track live clamps of a value to a minimum of 0 (in either
+		 * arg).
+		 */
+		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+			return;
+		}
+
+		validation_state->live_max_clamp_regs[lri_add] = true;
+	} else if (op_add == QPU_A_MIN) {
+		/* Track live clamps of a value clamped to a minimum of 0 and
+		 * a maximum of some uniform's offset.
+		 */
+		if (!add_a_is_min_0)
+			return;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+		      sig != QPU_SIG_SMALL_IMM)) {
+			return;
+		}
+
+		validation_state->live_min_clamp_offsets[lri_add] =
+			validated_shader->uniforms_size;
+	}
+}
+
+static bool
+check_instruction_writes(uint64_t inst,
+			 struct vc4_validated_shader_info *validated_shader,
+			 struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	bool ok;
+
+	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
+		DRM_ERROR("ADD and MUL both set up textures\n");
+		return false;
+	}
+
+	ok = (check_reg_write(inst, validated_shader, validation_state,
+			      false) &&
+	      check_reg_write(inst, validated_shader, validation_state,
+			      true));
+
+	track_live_clamps(inst, validated_shader, validation_state);
+
+	return ok;
+}
+
+static bool
+check_instruction_reads(uint64_t inst,
+			struct vc4_validated_shader_info *validated_shader)
+{
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+	if (raddr_a == QPU_R_UNIF ||
+	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
+		/* This can't overflow the uint32_t, because we're reading 8
+		 * bytes of instruction to increment by 4 here, so we'd
+		 * already be OOM.
+		 */
+		validated_shader->uniforms_size += 4;
+	}
+
+	return true;
+}
+
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
+{
+	bool found_shader_end = false;
+	int shader_end_ip = 0;
+	uint32_t ip, max_ip;
+	uint64_t *shader;
+	struct vc4_validated_shader_info *validated_shader;
+	struct vc4_shader_validation_state validation_state;
+	int i;
+
+	memset(&validation_state, 0, sizeof(validation_state));
+
+	for (i = 0; i < 8; i++)
+		validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
+	for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
+		validation_state.live_min_clamp_offsets[i] = ~0;
+
+	shader = shader_obj->vaddr;
+	max_ip = shader_obj->base.size / sizeof(uint64_t);
+
+	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
+	if (!validated_shader)
+		return NULL;
+
+	for (ip = 0; ip < max_ip; ip++) {
+		uint64_t inst = shader[ip];
+		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+		switch (sig) {
+		case QPU_SIG_NONE:
+		case QPU_SIG_WAIT_FOR_SCOREBOARD:
+		case QPU_SIG_SCOREBOARD_UNLOCK:
+		case QPU_SIG_COLOR_LOAD:
+		case QPU_SIG_LOAD_TMU0:
+		case QPU_SIG_LOAD_TMU1:
+		case QPU_SIG_PROG_END:
+		case QPU_SIG_SMALL_IMM:
+			if (!check_instruction_writes(inst, validated_shader,
+						      &validation_state)) {
+				DRM_ERROR("Bad write at ip %d\n", ip);
+				goto fail;
+			}
+
+			if (!check_instruction_reads(inst, validated_shader))
+				goto fail;
+
+			if (sig == QPU_SIG_PROG_END) {
+				found_shader_end = true;
+				shader_end_ip = ip;
+			}
+
+			break;
+
+		case QPU_SIG_LOAD_IMM:
+			if (!check_instruction_writes(inst, validated_shader,
+						      &validation_state)) {
+				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
+				goto fail;
+			}
+			break;
+
+		default:
+			DRM_ERROR("Unsupported QPU signal %d at "
+				  "instruction %d\n", sig, ip);
+			goto fail;
+		}
+
+		/* There are two delay slots after program end is signaled
+		 * that are still executed, then we're finished.
+		 */
+		if (found_shader_end && ip == shader_end_ip + 2)
+			break;
+	}
+
+	if (ip == max_ip) {
+		DRM_ERROR("shader failed to terminate before "
+			  "shader BO end at %zd\n",
+			  shader_obj->base.size);
+		goto fail;
+	}
+
+	/* Again, no chance of integer overflow here because the worst case
+	 * scenario is 8 bytes of uniforms plus handles per 8-byte
+	 * instruction.
+	 */
+	validated_shader->uniforms_src_size =
+		(validated_shader->uniforms_size +
+		 4 * validated_shader->num_texture_samples);
+
+	return validated_shader;
+
+fail:
+	if (validated_shader) {
+		kfree(validated_shader->texture_samples);
+		kfree(validated_shader);
+	}
+	return NULL;
+}
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 219d34c42272..74de18416be9 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -28,9 +28,11 @@
 
 #define DRM_VC4_CREATE_BO                         0x03
 #define DRM_VC4_MMAP_BO                           0x04
+#define DRM_VC4_CREATE_SHADER_BO                  0x05
 
 #define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
 #define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
 
 /**
  * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
@@ -65,4 +67,27 @@ struct drm_vc4_mmap_bo {
 	__u64 offset;
 };
 
+/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+	/* Size of the data argument. */
+	__u32 size;
+	/* Flags, currently must be 0. */
+	__u32 flags;
+
+	/* Pointer to the data. */
+	__u64 data;
+
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/* Pad, must be 0. */
+	__u32 pad;
+};
+
 #endif /* _UAPI_VC4_DRM_H_ */
-- 
cgit v1.2.3-58-ga151


From d5b1a78a772f1e31a94f8babfa964152ec5e9aa5 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 30 Nov 2015 12:13:37 -0800
Subject: drm/vc4: Add support for drawing 3D frames.

The user submission is basically a pointer to a command list and a
pointer to uniforms.  We copy those in to the kernel, validate and
relocate them, and store the result in a GPU BO which we queue for
execution.

v2: Drop support for NV shader recs (not necessary for GL), simplify
    vc4_use_bo(), improve bin flush/semaphore checks, use __u32 style
    types.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/Makefile           |   7 +
 drivers/gpu/drm/vc4/vc4_drv.c          |  15 +-
 drivers/gpu/drm/vc4/vc4_drv.h          | 182 +++++++
 drivers/gpu/drm/vc4/vc4_gem.c          | 642 +++++++++++++++++++++++
 drivers/gpu/drm/vc4/vc4_irq.c          | 210 ++++++++
 drivers/gpu/drm/vc4/vc4_packet.h       | 399 +++++++++++++++
 drivers/gpu/drm/vc4/vc4_render_cl.c    | 634 +++++++++++++++++++++++
 drivers/gpu/drm/vc4/vc4_trace.h        |  63 +++
 drivers/gpu/drm/vc4/vc4_trace_points.c |  14 +
 drivers/gpu/drm/vc4/vc4_v3d.c          |  37 ++
 drivers/gpu/drm/vc4/vc4_validate.c     | 900 +++++++++++++++++++++++++++++++++
 include/uapi/drm/vc4_drm.h             | 141 ++++++
 12 files changed, 3243 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/vc4/vc4_gem.c
 create mode 100644 drivers/gpu/drm/vc4/vc4_irq.c
 create mode 100644 drivers/gpu/drm/vc4/vc4_packet.h
 create mode 100644 drivers/gpu/drm/vc4/vc4_render_cl.c
 create mode 100644 drivers/gpu/drm/vc4/vc4_trace.h
 create mode 100644 drivers/gpu/drm/vc4/vc4_trace_points.c
 create mode 100644 drivers/gpu/drm/vc4/vc4_validate.c

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index e87a6f2f5916..4c6a99f0398c 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -8,12 +8,19 @@ vc4-y := \
 	vc4_crtc.o \
 	vc4_drv.o \
 	vc4_kms.o \
+	vc4_gem.o \
 	vc4_hdmi.o \
 	vc4_hvs.o \
+	vc4_irq.o \
 	vc4_plane.o \
+	vc4_render_cl.o \
+	vc4_trace_points.o \
 	vc4_v3d.o \
+	vc4_validate.o \
 	vc4_validate_shaders.o
 
 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
 
 obj-$(CONFIG_DRM_VC4)  += vc4.o
+
+CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index db58d74efe95..2cfee5959455 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -74,6 +74,9 @@ static const struct file_operations vc4_drm_fops = {
 };
 
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
@@ -83,10 +86,16 @@ static struct drm_driver vc4_drm_driver = {
 	.driver_features = (DRIVER_MODESET |
 			    DRIVER_ATOMIC |
 			    DRIVER_GEM |
+			    DRIVER_HAVE_IRQ |
 			    DRIVER_PRIME),
 	.lastclose = vc4_lastclose,
 	.preclose = vc4_drm_preclose,
 
+	.irq_handler = vc4_irq,
+	.irq_preinstall = vc4_irq_preinstall,
+	.irq_postinstall = vc4_irq_postinstall,
+	.irq_uninstall = vc4_irq_uninstall,
+
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
 	.get_vblank_counter = drm_vblank_count,
@@ -181,9 +190,11 @@ static int vc4_drm_bind(struct device *dev)
 	if (ret)
 		goto unref;
 
+	vc4_gem_init(drm);
+
 	ret = component_bind_all(dev, drm);
 	if (ret)
-		goto unref;
+		goto gem_destroy;
 
 	ret = drm_dev_register(drm, 0);
 	if (ret < 0)
@@ -207,6 +218,8 @@ unregister:
 	drm_dev_unregister(drm);
 unbind_all:
 	component_unbind_all(dev, drm);
+gem_destroy:
+	vc4_gem_destroy(drm);
 unref:
 	drm_dev_unref(drm);
 	vc4_bo_cache_destroy(drm);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 8945463e70b6..0bc8c57196ac 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -49,6 +49,48 @@ struct vc4_dev {
 
 	/* Protects bo_cache and the BO stats. */
 	struct mutex bo_lock;
+
+	/* Sequence number for the last job queued in job_list.
+	 * Starts at 0 (no jobs emitted).
+	 */
+	uint64_t emit_seqno;
+
+	/* Sequence number for the last completed job on the GPU.
+	 * Starts at 0 (no jobs completed).
+	 */
+	uint64_t finished_seqno;
+
+	/* List of all struct vc4_exec_info for jobs to be executed.
+	 * The first job in the list is the one currently programmed
+	 * into ct0ca/ct1ca for execution.
+	 */
+	struct list_head job_list;
+	/* List of the finished vc4_exec_infos waiting to be freed by
+	 * job_done_work.
+	 */
+	struct list_head job_done_list;
+	/* Spinlock used to synchronize the job_list and seqno
+	 * accesses between the IRQ handler and GEM ioctls.
+	 */
+	spinlock_t job_lock;
+	wait_queue_head_t job_wait_queue;
+	struct work_struct job_done_work;
+
+	/* The binner overflow memory that's currently set up in
+	 * BPOA/BPOS registers.  When overflow occurs and a new one is
+	 * allocated, the previous one will be moved to
+	 * vc4->current_exec's free list.
+	 */
+	struct vc4_bo *overflow_mem;
+	struct work_struct overflow_mem_work;
+
+	struct {
+		uint32_t last_ct0ca, last_ct1ca;
+		struct timer_list timer;
+		struct work_struct reset_work;
+	} hangcheck;
+
+	struct semaphore async_modeset;
 };
 
 static inline struct vc4_dev *
@@ -60,6 +102,9 @@ to_vc4_dev(struct drm_device *dev)
 struct vc4_bo {
 	struct drm_gem_cma_object base;
 
+	/* seqno of the last job to render to this BO. */
+	uint64_t seqno;
+
 	/* List entry for the BO's position in either
 	 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
 	 */
@@ -130,6 +175,101 @@ to_vc4_encoder(struct drm_encoder *encoder)
 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
 
+struct vc4_exec_info {
+	/* Sequence number for this bin/render job. */
+	uint64_t seqno;
+
+	/* Kernel-space copy of the ioctl arguments */
+	struct drm_vc4_submit_cl *args;
+
+	/* This is the array of BOs that were looked up at the start of exec.
+	 * Command validation will use indices into this array.
+	 */
+	struct drm_gem_cma_object **bo;
+	uint32_t bo_count;
+
+	/* Pointers for our position in vc4->job_list */
+	struct list_head head;
+
+	/* List of other BOs used in the job that need to be released
+	 * once the job is complete.
+	 */
+	struct list_head unref_list;
+
+	/* Current unvalidated indices into @bo loaded by the non-hardware
+	 * VC4_PACKET_GEM_HANDLES.
+	 */
+	uint32_t bo_index[2];
+
+	/* This is the BO where we store the validated command lists, shader
+	 * records, and uniforms.
+	 */
+	struct drm_gem_cma_object *exec_bo;
+
+	/**
+	 * This tracks the per-shader-record state (packet 64) that
+	 * determines the length of the shader record and the offset
+	 * it's expected to be found at.  It gets read in from the
+	 * command lists.
+	 */
+	struct vc4_shader_state {
+		uint32_t addr;
+		/* Maximum vertex index referenced by any primitive using this
+		 * shader state.
+		 */
+		uint32_t max_index;
+	} *shader_state;
+
+	/** How many shader states the user declared they were using. */
+	uint32_t shader_state_size;
+	/** How many shader state records the validator has seen. */
+	uint32_t shader_state_count;
+
+	bool found_tile_binning_mode_config_packet;
+	bool found_start_tile_binning_packet;
+	bool found_increment_semaphore_packet;
+	bool found_flush;
+	uint8_t bin_tiles_x, bin_tiles_y;
+	struct drm_gem_cma_object *tile_bo;
+	uint32_t tile_alloc_offset;
+
+	/**
+	 * Computed addresses pointing into exec_bo where we start the
+	 * bin thread (ct0) and render thread (ct1).
+	 */
+	uint32_t ct0ca, ct0ea;
+	uint32_t ct1ca, ct1ea;
+
+	/* Pointer to the unvalidated bin CL (if present). */
+	void *bin_u;
+
+	/* Pointers to the shader recs.  These paddr gets incremented as CL
+	 * packets are relocated in validate_gl_shader_state, and the vaddrs
+	 * (u and v) get incremented and size decremented as the shader recs
+	 * themselves are validated.
+	 */
+	void *shader_rec_u;
+	void *shader_rec_v;
+	uint32_t shader_rec_p;
+	uint32_t shader_rec_size;
+
+	/* Pointers to the uniform data.  These pointers are incremented, and
+	 * size decremented, as each batch of uniforms is uploaded.
+	 */
+	void *uniforms_u;
+	void *uniforms_v;
+	uint32_t uniforms_p;
+	uint32_t uniforms_size;
+};
+
+static inline struct vc4_exec_info *
+vc4_first_job(struct vc4_dev *vc4)
+{
+	if (list_empty(&vc4->job_list))
+		return NULL;
+	return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
+}
+
 /**
  * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
  * setup parameters.
@@ -231,10 +371,31 @@ void vc4_debugfs_cleanup(struct drm_minor *minor);
 /* vc4_drv.c */
 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
 
+/* vc4_gem.c */
+void vc4_gem_init(struct drm_device *dev);
+void vc4_gem_destroy(struct drm_device *dev);
+int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+void vc4_submit_next_job(struct drm_device *dev);
+int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
+		       uint64_t timeout_ns, bool interruptible);
+void vc4_job_handle_completed(struct vc4_dev *vc4);
+
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
 int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
 
+/* vc4_irq.c */
+irqreturn_t vc4_irq(int irq, void *arg);
+void vc4_irq_preinstall(struct drm_device *dev);
+int vc4_irq_postinstall(struct drm_device *dev);
+void vc4_irq_uninstall(struct drm_device *dev);
+void vc4_irq_reset(struct drm_device *dev);
+
 /* vc4_hvs.c */
 extern struct platform_driver vc4_hvs_driver;
 void vc4_hvs_dump_state(struct drm_device *dev);
@@ -253,6 +414,27 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state);
 extern struct platform_driver vc4_v3d_driver;
 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
 int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
+int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
+
+/* vc4_validate.c */
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec);
+
+int
+vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
+
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+				      uint32_t hindex);
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
+
+bool vc4_check_tex_size(struct vc4_exec_info *exec,
+			struct drm_gem_cma_object *fbo,
+			uint32_t offset, uint8_t tiling_format,
+			uint32_t width, uint32_t height, uint8_t cpp);
 
 /* vc4_validate_shader.c */
 struct vc4_validated_shader_info *
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
new file mode 100644
index 000000000000..936dddfa890f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/io.h>
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+#include "vc4_trace.h"
+
+static void
+vc4_queue_hangcheck(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mod_timer(&vc4->hangcheck.timer,
+		  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
+}
+
+static void
+vc4_reset(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	DRM_INFO("Resetting GPU.\n");
+	vc4_v3d_set_power(vc4, false);
+	vc4_v3d_set_power(vc4, true);
+
+	vc4_irq_reset(dev);
+
+	/* Rearm the hangcheck -- another job might have been waiting
+	 * for our hung one to get kicked off, and vc4_irq_reset()
+	 * would have started it.
+	 */
+	vc4_queue_hangcheck(dev);
+}
+
+static void
+vc4_reset_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, hangcheck.reset_work);
+
+	vc4_reset(vc4->dev);
+}
+
+static void
+vc4_hangcheck_elapsed(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t ct0ca, ct1ca;
+
+	/* If idle, we can stop watching for hangs. */
+	if (list_empty(&vc4->job_list))
+		return;
+
+	ct0ca = V3D_READ(V3D_CTNCA(0));
+	ct1ca = V3D_READ(V3D_CTNCA(1));
+
+	/* If we've made any progress in execution, rearm the timer
+	 * and wait.
+	 */
+	if (ct0ca != vc4->hangcheck.last_ct0ca ||
+	    ct1ca != vc4->hangcheck.last_ct1ca) {
+		vc4->hangcheck.last_ct0ca = ct0ca;
+		vc4->hangcheck.last_ct1ca = ct1ca;
+		vc4_queue_hangcheck(dev);
+		return;
+	}
+
+	/* We've gone too long with no progress, reset.  This has to
+	 * be done from a work struct, since resetting can sleep and
+	 * this timer hook isn't allowed to.
+	 */
+	schedule_work(&vc4->hangcheck.reset_work);
+}
+
+static void
+submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Set the current and end address of the control list.
+	 * Writing the end register is what starts the job.
+	 */
+	V3D_WRITE(V3D_CTNCA(thread), start);
+	V3D_WRITE(V3D_CTNEA(thread), end);
+}
+
+int
+vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
+		   bool interruptible)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret = 0;
+	unsigned long timeout_expire;
+	DEFINE_WAIT(wait);
+
+	if (vc4->finished_seqno >= seqno)
+		return 0;
+
+	if (timeout_ns == 0)
+		return -ETIME;
+
+	timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
+
+	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
+	for (;;) {
+		prepare_to_wait(&vc4->job_wait_queue, &wait,
+				interruptible ? TASK_INTERRUPTIBLE :
+				TASK_UNINTERRUPTIBLE);
+
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (vc4->finished_seqno >= seqno)
+			break;
+
+		if (timeout_ns != ~0ull) {
+			if (time_after_eq(jiffies, timeout_expire)) {
+				ret = -ETIME;
+				break;
+			}
+			schedule_timeout(timeout_expire - jiffies);
+		} else {
+			schedule();
+		}
+	}
+
+	finish_wait(&vc4->job_wait_queue, &wait);
+	trace_vc4_wait_for_seqno_end(dev, seqno);
+
+	if (ret && ret != -ERESTARTSYS) {
+		DRM_ERROR("timeout waiting for render thread idle\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+vc4_flush_caches(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Flush the GPU L2 caches.  These caches sit on top of system
+	 * L3 (the 128kb or so shared with the CPU), and are
+	 * non-allocating in the L3.
+	 */
+	V3D_WRITE(V3D_L2CACTL,
+		  V3D_L2CACTL_L2CCLR);
+
+	V3D_WRITE(V3D_SLCACTL,
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+/* Sets the registers for the next job to be actually be executed in
+ * the hardware.
+ *
+ * The job_lock should be held during this.
+ */
+void
+vc4_submit_next_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4_flush_caches(dev);
+
+	/* Disable the binner's pre-loaded overflow memory address */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	if (exec->ct0ca != exec->ct0ea)
+		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
+	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
+}
+
+static void
+vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
+{
+	struct vc4_bo *bo;
+	unsigned i;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		bo = to_vc4_bo(&exec->bo[i]->base);
+		bo->seqno = seqno;
+	}
+
+	list_for_each_entry(bo, &exec->unref_list, unref_head) {
+		bo->seqno = seqno;
+	}
+}
+
+/* Queues a struct vc4_exec_info for execution.  If no job is
+ * currently executing, then submits it.
+ *
+ * Unlike most GPUs, our hardware only handles one command list at a
+ * time.  To queue multiple jobs at once, we'd need to edit the
+ * previous command list to have a jump to the new one at the end, and
+ * then bump the end address.  That's a change for a later date,
+ * though.
+ */
+static void
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint64_t seqno;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+	seqno = ++vc4->emit_seqno;
+	exec->seqno = seqno;
+	vc4_update_bo_seqnos(exec, seqno);
+
+	list_add_tail(&exec->head, &vc4->job_list);
+
+	/* If no job was executing, kick ours off.  Otherwise, it'll
+	 * get started when the previous job's frame done interrupt
+	 * occurs.
+	 */
+	if (vc4_first_job(vc4) == exec) {
+		vc4_submit_next_job(dev);
+		vc4_queue_hangcheck(dev);
+	}
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+/**
+ * Looks up a bunch of GEM handles for BOs and stores the array for
+ * use in the command validator that actually writes relocated
+ * addresses pointing to them.
+ */
+static int
+vc4_cl_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct vc4_exec_info *exec)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	uint32_t *handles;
+	int ret = 0;
+	int i;
+
+	exec->bo_count = args->bo_handle_count;
+
+	if (!exec->bo_count) {
+		/* See comment on bo_index for why we have to check
+		 * this.
+		 */
+		DRM_ERROR("Rendering requires BOs to validate\n");
+		return -EINVAL;
+	}
+
+	exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
+			   GFP_KERNEL);
+	if (!exec->bo) {
+		DRM_ERROR("Failed to allocate validated BO pointers\n");
+		return -ENOMEM;
+	}
+
+	handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
+	if (!handles) {
+		DRM_ERROR("Failed to allocate incoming GEM handles\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(handles,
+			     (void __user *)(uintptr_t)args->bo_handles,
+			     exec->bo_count * sizeof(uint32_t));
+	if (ret) {
+		DRM_ERROR("Failed to copy in GEM handles\n");
+		goto fail;
+	}
+
+	spin_lock(&file_priv->table_lock);
+	for (i = 0; i < exec->bo_count; i++) {
+		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+						     handles[i]);
+		if (!bo) {
+			DRM_ERROR("Failed to look up GEM BO %d: %d\n",
+				  i, handles[i]);
+			ret = -EINVAL;
+			spin_unlock(&file_priv->table_lock);
+			goto fail;
+		}
+		drm_gem_object_reference(bo);
+		exec->bo[i] = (struct drm_gem_cma_object *)bo;
+	}
+	spin_unlock(&file_priv->table_lock);
+
+fail:
+	kfree(handles);
+	return 0;
+}
+
+static int
+vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	void *temp = NULL;
+	void *bin;
+	int ret = 0;
+	uint32_t bin_offset = 0;
+	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
+					     16);
+	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
+	uint32_t exec_size = uniforms_offset + args->uniforms_size;
+	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
+					  args->shader_rec_count);
+	struct vc4_bo *bo;
+
+	if (uniforms_offset < shader_rec_offset ||
+	    exec_size < uniforms_offset ||
+	    args->shader_rec_count >= (UINT_MAX /
+					  sizeof(struct vc4_shader_state)) ||
+	    temp_size < exec_size) {
+		DRM_ERROR("overflow in exec arguments\n");
+		goto fail;
+	}
+
+	/* Allocate space where we'll store the copied in user command lists
+	 * and shader records.
+	 *
+	 * We don't just copy directly into the BOs because we need to
+	 * read the contents back for validation, and I think the
+	 * bo->vaddr is uncached access.
+	 */
+	temp = kmalloc(temp_size, GFP_KERNEL);
+	if (!temp) {
+		DRM_ERROR("Failed to allocate storage for copying "
+			  "in bin/render CLs.\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	bin = temp + bin_offset;
+	exec->shader_rec_u = temp + shader_rec_offset;
+	exec->uniforms_u = temp + uniforms_offset;
+	exec->shader_state = temp + exec_size;
+	exec->shader_state_size = args->shader_rec_count;
+
+	ret = copy_from_user(bin,
+			     (void __user *)(uintptr_t)args->bin_cl,
+			     args->bin_cl_size);
+	if (ret) {
+		DRM_ERROR("Failed to copy in bin cl\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(exec->shader_rec_u,
+			     (void __user *)(uintptr_t)args->shader_rec,
+			     args->shader_rec_size);
+	if (ret) {
+		DRM_ERROR("Failed to copy in shader recs\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(exec->uniforms_u,
+			     (void __user *)(uintptr_t)args->uniforms,
+			     args->uniforms_size);
+	if (ret) {
+		DRM_ERROR("Failed to copy in uniforms cl\n");
+		goto fail;
+	}
+
+	bo = vc4_bo_create(dev, exec_size, true);
+	if (!bo) {
+		DRM_ERROR("Couldn't allocate BO for binning\n");
+		ret = PTR_ERR(exec->exec_bo);
+		goto fail;
+	}
+	exec->exec_bo = &bo->base;
+
+	list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
+		      &exec->unref_list);
+
+	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+
+	exec->bin_u = bin;
+
+	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
+	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
+	exec->shader_rec_size = args->shader_rec_size;
+
+	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
+	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
+	exec->uniforms_size = args->uniforms_size;
+
+	ret = vc4_validate_bin_cl(dev,
+				  exec->exec_bo->vaddr + bin_offset,
+				  bin,
+				  exec);
+	if (ret)
+		goto fail;
+
+	ret = vc4_validate_shader_recs(dev, exec);
+
+fail:
+	kfree(temp);
+	return ret;
+}
+
+static void
+vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	unsigned i;
+
+	/* Need the struct lock for drm_gem_object_unreference(). */
+	mutex_lock(&dev->struct_mutex);
+	if (exec->bo) {
+		for (i = 0; i < exec->bo_count; i++)
+			drm_gem_object_unreference(&exec->bo[i]->base);
+		kfree(exec->bo);
+	}
+
+	while (!list_empty(&exec->unref_list)) {
+		struct vc4_bo *bo = list_first_entry(&exec->unref_list,
+						     struct vc4_bo, unref_head);
+		list_del(&bo->unref_head);
+		drm_gem_object_unreference(&bo->base.base);
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(exec);
+}
+
+void
+vc4_job_handle_completed(struct vc4_dev *vc4)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	while (!list_empty(&vc4->job_done_list)) {
+		struct vc4_exec_info *exec =
+			list_first_entry(&vc4->job_done_list,
+					 struct vc4_exec_info, head);
+		list_del(&exec->head);
+
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		vc4_complete_exec(vc4->dev, exec);
+		spin_lock_irqsave(&vc4->job_lock, irqflags);
+	}
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+/* Scheduled when any job has been completed, this walks the list of
+ * jobs that had completed and unrefs their BOs and frees their exec
+ * structs.
+ */
+static void
+vc4_job_done_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, job_done_work);
+
+	vc4_job_handle_completed(vc4);
+}
+
+static int
+vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
+				uint64_t seqno,
+				uint64_t *timeout_ns)
+{
+	unsigned long start = jiffies;
+	int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
+
+	if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
+		uint64_t delta = jiffies_to_nsecs(jiffies - start);
+
+		if (*timeout_ns >= delta)
+			*timeout_ns -= delta;
+	}
+
+	return ret;
+}
+
+int
+vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_vc4_wait_seqno *args = data;
+
+	return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
+					       &args->timeout_ns);
+}
+
+int
+vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	int ret;
+	struct drm_vc4_wait_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+
+	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -EINVAL;
+	}
+	bo = to_vc4_bo(gem_obj);
+
+	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
+					      &args->timeout_ns);
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+	return ret;
+}
+
+/**
+ * Submits a command list to the VC4.
+ *
+ * This is what is called batchbuffer emitting on other hardware.
+ */
+int
+vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_submit_cl *args = data;
+	struct vc4_exec_info *exec;
+	int ret;
+
+	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
+		DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
+		return -EINVAL;
+	}
+
+	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+	if (!exec) {
+		DRM_ERROR("malloc failure on exec struct\n");
+		return -ENOMEM;
+	}
+
+	exec->args = args;
+	INIT_LIST_HEAD(&exec->unref_list);
+
+	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
+	if (ret)
+		goto fail;
+
+	if (exec->args->bin_cl_size != 0) {
+		ret = vc4_get_bcl(dev, exec);
+		if (ret)
+			goto fail;
+	} else {
+		exec->ct0ca = 0;
+		exec->ct0ea = 0;
+	}
+
+	ret = vc4_get_rcl(dev, exec);
+	if (ret)
+		goto fail;
+
+	/* Clear this out of the struct we'll be putting in the queue,
+	 * since it's part of our stack.
+	 */
+	exec->args = NULL;
+
+	vc4_queue_submit(dev, exec);
+
+	/* Return the seqno for our job. */
+	args->seqno = vc4->emit_seqno;
+
+	return 0;
+
+fail:
+	vc4_complete_exec(vc4->dev, exec);
+
+	return ret;
+}
+
+void
+vc4_gem_init(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	INIT_LIST_HEAD(&vc4->job_list);
+	INIT_LIST_HEAD(&vc4->job_done_list);
+	spin_lock_init(&vc4->job_lock);
+
+	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
+	setup_timer(&vc4->hangcheck.timer,
+		    vc4_hangcheck_elapsed,
+		    (unsigned long)dev);
+
+	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+}
+
+void
+vc4_gem_destroy(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Waiting for exec to finish would need to be done before
+	 * unregistering V3D.
+	 */
+	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
+
+	/* V3D should already have disabled its interrupt and cleared
+	 * the overflow allocation registers.  Now free the object.
+	 */
+	if (vc4->overflow_mem) {
+		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+		vc4->overflow_mem = NULL;
+	}
+
+	vc4_bo_cache_destroy(dev);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
new file mode 100644
index 000000000000..b68060e758db
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** DOC: Interrupt management for the V3D engine.
+ *
+ * We have an interrupt status register (V3D_INTCTL) which reports
+ * interrupts, and where writing 1 bits clears those interrupts.
+ * There are also a pair of interrupt registers
+ * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
+ * disables that specific interrupt, and 0s written are ignored
+ * (reading either one returns the set of enabled interrupts).
+ *
+ * When we take a render frame interrupt, we need to wake the
+ * processes waiting for some frame to be done, and get the next frame
+ * submitted ASAP (so the hardware doesn't sit idle when there's work
+ * to do).
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
+			 V3D_INT_FRDONE)
+
+DECLARE_WAIT_QUEUE_HEAD(render_wait);
+
+static void
+vc4_overflow_mem_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, overflow_mem_work);
+	struct drm_device *dev = vc4->dev;
+	struct vc4_bo *bo;
+
+	bo = vc4_bo_create(dev, 256 * 1024, true);
+	if (!bo) {
+		DRM_ERROR("Couldn't allocate binner overflow mem\n");
+		return;
+	}
+
+	/* If there's a job executing currently, then our previous
+	 * overflow allocation is getting used in that job and we need
+	 * to queue it to be released when the job is done.  But if no
+	 * job is executing at all, then we can free the old overflow
+	 * object direcctly.
+	 *
+	 * No lock necessary for this pointer since we're the only
+	 * ones that update the pointer, and our workqueue won't
+	 * reenter.
+	 */
+	if (vc4->overflow_mem) {
+		struct vc4_exec_info *current_exec;
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&vc4->job_lock, irqflags);
+		current_exec = vc4_first_job(vc4);
+		if (current_exec) {
+			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
+			list_add_tail(&vc4->overflow_mem->unref_head,
+				      &current_exec->unref_list);
+			vc4->overflow_mem = NULL;
+		}
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+	}
+
+	if (vc4->overflow_mem)
+		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+	vc4->overflow_mem = bo;
+
+	V3D_WRITE(V3D_BPOA, bo->base.paddr);
+	V3D_WRITE(V3D_BPOS, bo->base.base.size);
+	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
+	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
+}
+
+static void
+vc4_irq_finish_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4->finished_seqno++;
+	list_move_tail(&exec->head, &vc4->job_done_list);
+	vc4_submit_next_job(dev);
+
+	wake_up_all(&vc4->job_wait_queue);
+	schedule_work(&vc4->job_done_work);
+}
+
+irqreturn_t
+vc4_irq(int irq, void *arg)
+{
+	struct drm_device *dev = arg;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t intctl;
+	irqreturn_t status = IRQ_NONE;
+
+	barrier();
+	intctl = V3D_READ(V3D_INTCTL);
+
+	/* Acknowledge the interrupts we're handling here. The render
+	 * frame done interrupt will be cleared, while OUTOMEM will
+	 * stay high until the underlying cause is cleared.
+	 */
+	V3D_WRITE(V3D_INTCTL, intctl);
+
+	if (intctl & V3D_INT_OUTOMEM) {
+		/* Disable OUTOMEM until the work is done. */
+		V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
+		schedule_work(&vc4->overflow_mem_work);
+		status = IRQ_HANDLED;
+	}
+
+	if (intctl & V3D_INT_FRDONE) {
+		spin_lock(&vc4->job_lock);
+		vc4_irq_finish_job(dev);
+		spin_unlock(&vc4->job_lock);
+		status = IRQ_HANDLED;
+	}
+
+	return status;
+}
+
+void
+vc4_irq_preinstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	init_waitqueue_head(&vc4->job_wait_queue);
+	INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
+
+	/* Clear any pending interrupts someone might have left around
+	 * for us.
+	 */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+}
+
+int
+vc4_irq_postinstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Enable both the render done and out of memory interrupts. */
+	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+	return 0;
+}
+
+void
+vc4_irq_uninstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Disable sending interrupts for our driver's IRQs. */
+	V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
+
+	/* Clear any pending interrupts we might have left. */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+	cancel_work_sync(&vc4->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void vc4_irq_reset(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
+
+	/* Acknowledge any stale IRQs. */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+	/*
+	 * Turn all our interrupts on.  Binner out of memory is the
+	 * only one we expect to trigger at this point, since we've
+	 * just come from poweron and haven't supplied any overflow
+	 * memory yet.
+	 */
+	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	vc4_irq_finish_job(dev);
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h
new file mode 100644
index 000000000000..0f31cc06500f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_packet.h
@@ -0,0 +1,399 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_PACKET_H
+#define VC4_PACKET_H
+
+#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
+
+enum vc4_packet {
+	VC4_PACKET_HALT = 0,
+	VC4_PACKET_NOP = 1,
+
+	VC4_PACKET_FLUSH = 4,
+	VC4_PACKET_FLUSH_ALL = 5,
+	VC4_PACKET_START_TILE_BINNING = 6,
+	VC4_PACKET_INCREMENT_SEMAPHORE = 7,
+	VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
+
+	VC4_PACKET_BRANCH = 16,
+	VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
+
+	VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
+	VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
+	VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
+	VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
+	VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
+	VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
+
+	VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
+	VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
+
+	VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
+	VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
+
+	VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
+
+	VC4_PACKET_GL_SHADER_STATE = 64,
+	VC4_PACKET_NV_SHADER_STATE = 65,
+	VC4_PACKET_VG_SHADER_STATE = 66,
+
+	VC4_PACKET_CONFIGURATION_BITS = 96,
+	VC4_PACKET_FLAT_SHADE_FLAGS = 97,
+	VC4_PACKET_POINT_SIZE = 98,
+	VC4_PACKET_LINE_WIDTH = 99,
+	VC4_PACKET_RHT_X_BOUNDARY = 100,
+	VC4_PACKET_DEPTH_OFFSET = 101,
+	VC4_PACKET_CLIP_WINDOW = 102,
+	VC4_PACKET_VIEWPORT_OFFSET = 103,
+	VC4_PACKET_Z_CLIPPING = 104,
+	VC4_PACKET_CLIPPER_XY_SCALING = 105,
+	VC4_PACKET_CLIPPER_Z_SCALING = 106,
+
+	VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
+	VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
+	VC4_PACKET_CLEAR_COLORS = 114,
+	VC4_PACKET_TILE_COORDINATES = 115,
+
+	/* Not an actual hardware packet -- this is what we use to put
+	 * references to GEM bos in the command stream, since we need the u32
+	 * int the actual address packet in order to store the offset from the
+	 * start of the BO.
+	 */
+	VC4_PACKET_GEM_HANDLES = 254,
+} __attribute__ ((__packed__));
+
+#define VC4_PACKET_HALT_SIZE						1
+#define VC4_PACKET_NOP_SIZE						1
+#define VC4_PACKET_FLUSH_SIZE						1
+#define VC4_PACKET_FLUSH_ALL_SIZE					1
+#define VC4_PACKET_START_TILE_BINNING_SIZE				1
+#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE				1
+#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE				1
+#define VC4_PACKET_BRANCH_SIZE						5
+#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE				5
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE				1
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE			1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE				14
+#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE				10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE				1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE			1
+#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE				2
+#define VC4_PACKET_GL_SHADER_STATE_SIZE					5
+#define VC4_PACKET_NV_SHADER_STATE_SIZE					5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE					5
+#define VC4_PACKET_CONFIGURATION_BITS_SIZE				4
+#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE				5
+#define VC4_PACKET_POINT_SIZE_SIZE					5
+#define VC4_PACKET_LINE_WIDTH_SIZE					5
+#define VC4_PACKET_RHT_X_BOUNDARY_SIZE					3
+#define VC4_PACKET_DEPTH_OFFSET_SIZE					5
+#define VC4_PACKET_CLIP_WINDOW_SIZE					9
+#define VC4_PACKET_VIEWPORT_OFFSET_SIZE					5
+#define VC4_PACKET_Z_CLIPPING_SIZE					9
+#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE				9
+#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE				9
+#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE			16
+#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE			11
+#define VC4_PACKET_CLEAR_COLORS_SIZE					14
+#define VC4_PACKET_TILE_COORDINATES_SIZE				3
+#define VC4_PACKET_GEM_HANDLES_SIZE					9
+
+/* Number of multisamples supported. */
+#define VC4_MAX_SAMPLES							4
+/* Size of a full resolution color or Z tile buffer load/store. */
+#define VC4_TILE_BUFFER_SIZE			(64 * 64 * 4)
+
+/** @{
+ * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
+*/
+#define VC4_TILING_FORMAT_LINEAR    0
+#define VC4_TILING_FORMAT_T         1
+#define VC4_TILING_FORMAT_LT        2
+/** @} */
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           BIT(0)
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           BIT(0)
+
+/** @{
+ *
+ * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
+ */
+
+#define VC4_LOADSTORE_TILE_BUFFER_EOF                  BIT(3)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS      BIT(1)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR   BIT(0)
+
+/** @} */
+
+/** @{
+ *
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
+#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR     BIT(14)
+#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR  BIT(13)
+#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP         BIT(12)
+
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK      VC4_MASK(9, 8)
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT     8
+#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888         0
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER    1
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565           2
+/** @} */
+
+/** @{
+ *
+ * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_MODE_MASK            VC4_MASK(7, 6)
+#define VC4_STORE_TILE_BUFFER_MODE_SHIFT           6
+#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0         (0 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4     (1 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16    (2 << 6)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK      VC4_MASK(5, 4)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT     4
+
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK      VC4_MASK(2, 0)
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT     0
+#define VC4_LOADSTORE_TILE_BUFFER_NONE             0
+#define VC4_LOADSTORE_TILE_BUFFER_COLOR            1
+#define VC4_LOADSTORE_TILE_BUFFER_ZS               2
+#define VC4_LOADSTORE_TILE_BUFFER_Z                3
+#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK          4
+#define VC4_LOADSTORE_TILE_BUFFER_FULL             5
+/** @} */
+
+#define VC4_INDEX_BUFFER_U8                        (0 << 4)
+#define VC4_INDEX_BUFFER_U16                       (1 << 4)
+
+/* This flag is only present in NV shader state. */
+#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS         BIT(3)
+#define VC4_SHADER_FLAG_ENABLE_CLIPPING            BIT(2)
+#define VC4_SHADER_FLAG_VS_POINT_SIZE              BIT(1)
+#define VC4_SHADER_FLAG_FS_SINGLE_THREAD           BIT(0)
+
+/** @{ byte 2 of config bits. */
+#define VC4_CONFIG_BITS_EARLY_Z_UPDATE             BIT(1)
+#define VC4_CONFIG_BITS_EARLY_Z                    BIT(0)
+/** @} */
+
+/** @{ byte 1 of config bits. */
+#define VC4_CONFIG_BITS_Z_UPDATE                   BIT(7)
+/** same values in this 3-bit field as PIPE_FUNC_* */
+#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT           4
+#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE        BIT(3)
+
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO    (0 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD        (1 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR         (2 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO       (3 << 1)
+
+#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT       BIT(0)
+/** @} */
+
+/** @{ byte 0 of config bits. */
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X   (1 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X  (2 << 6)
+
+#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES        BIT(4)
+#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET        BIT(3)
+#define VC4_CONFIG_BITS_CW_PRIMITIVES              BIT(2)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK           BIT(1)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT          BIT(0)
+/** @} */
+
+/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
+#define VC4_BIN_CONFIG_DB_NON_MS                   BIT(7)
+
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK       VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT      5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        3
+
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK  VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   3
+
+#define VC4_BIN_CONFIG_AUTO_INIT_TSDA              BIT(2)
+#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT           BIT(1)
+#define VC4_BIN_CONFIG_MS_MODE_4X                  BIT(0)
+/** @} */
+
+/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
+#define VC4_RENDER_CONFIG_DB_NON_MS                BIT(12)
+#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
+#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G      BIT(10)
+#define VC4_RENDER_CONFIG_COVERAGE_MODE            BIT(9)
+#define VC4_RENDER_CONFIG_ENABLE_VG_MASK           BIT(8)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK       VC4_MASK(7, 6)
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT      6
+
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X         (0 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X         (1 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X        (2 << 4)
+
+#define VC4_RENDER_CONFIG_FORMAT_MASK              VC4_MASK(3, 2)
+#define VC4_RENDER_CONFIG_FORMAT_SHIFT             2
+#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED   0
+#define VC4_RENDER_CONFIG_FORMAT_RGBA8888          1
+#define VC4_RENDER_CONFIG_FORMAT_BGR565            2
+
+#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT        BIT(1)
+#define VC4_RENDER_CONFIG_MS_MODE_4X               BIT(0)
+
+#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX         (1 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_32_XY            (3 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS      (0 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES       (1 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES   (2 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT         (3 << 0)
+
+enum vc4_texture_data_type {
+	VC4_TEXTURE_TYPE_RGBA8888 = 0,
+	VC4_TEXTURE_TYPE_RGBX8888 = 1,
+	VC4_TEXTURE_TYPE_RGBA4444 = 2,
+	VC4_TEXTURE_TYPE_RGBA5551 = 3,
+	VC4_TEXTURE_TYPE_RGB565 = 4,
+	VC4_TEXTURE_TYPE_LUMINANCE = 5,
+	VC4_TEXTURE_TYPE_ALPHA = 6,
+	VC4_TEXTURE_TYPE_LUMALPHA = 7,
+	VC4_TEXTURE_TYPE_ETC1 = 8,
+	VC4_TEXTURE_TYPE_S16F = 9,
+	VC4_TEXTURE_TYPE_S8 = 10,
+	VC4_TEXTURE_TYPE_S16 = 11,
+	VC4_TEXTURE_TYPE_BW1 = 12,
+	VC4_TEXTURE_TYPE_A4 = 13,
+	VC4_TEXTURE_TYPE_A1 = 14,
+	VC4_TEXTURE_TYPE_RGBA64 = 15,
+	VC4_TEXTURE_TYPE_RGBA32R = 16,
+	VC4_TEXTURE_TYPE_YUV422R = 17,
+};
+
+#define VC4_TEX_P0_OFFSET_MASK                     VC4_MASK(31, 12)
+#define VC4_TEX_P0_OFFSET_SHIFT                    12
+#define VC4_TEX_P0_CSWIZ_MASK                      VC4_MASK(11, 10)
+#define VC4_TEX_P0_CSWIZ_SHIFT                     10
+#define VC4_TEX_P0_CMMODE_MASK                     VC4_MASK(9, 9)
+#define VC4_TEX_P0_CMMODE_SHIFT                    9
+#define VC4_TEX_P0_FLIPY_MASK                      VC4_MASK(8, 8)
+#define VC4_TEX_P0_FLIPY_SHIFT                     8
+#define VC4_TEX_P0_TYPE_MASK                       VC4_MASK(7, 4)
+#define VC4_TEX_P0_TYPE_SHIFT                      4
+#define VC4_TEX_P0_MIPLVLS_MASK                    VC4_MASK(3, 0)
+#define VC4_TEX_P0_MIPLVLS_SHIFT                   0
+
+#define VC4_TEX_P1_TYPE4_MASK                      VC4_MASK(31, 31)
+#define VC4_TEX_P1_TYPE4_SHIFT                     31
+#define VC4_TEX_P1_HEIGHT_MASK                     VC4_MASK(30, 20)
+#define VC4_TEX_P1_HEIGHT_SHIFT                    20
+#define VC4_TEX_P1_ETCFLIP_MASK                    VC4_MASK(19, 19)
+#define VC4_TEX_P1_ETCFLIP_SHIFT                   19
+#define VC4_TEX_P1_WIDTH_MASK                      VC4_MASK(18, 8)
+#define VC4_TEX_P1_WIDTH_SHIFT                     8
+
+#define VC4_TEX_P1_MAGFILT_MASK                    VC4_MASK(7, 7)
+#define VC4_TEX_P1_MAGFILT_SHIFT                   7
+# define VC4_TEX_P1_MAGFILT_LINEAR                 0
+# define VC4_TEX_P1_MAGFILT_NEAREST                1
+
+#define VC4_TEX_P1_MINFILT_MASK                    VC4_MASK(6, 4)
+#define VC4_TEX_P1_MINFILT_SHIFT                   4
+# define VC4_TEX_P1_MINFILT_LINEAR                 0
+# define VC4_TEX_P1_MINFILT_NEAREST                1
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR          2
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN           3
+# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR           4
+# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN            5
+
+#define VC4_TEX_P1_WRAP_T_MASK                     VC4_MASK(3, 2)
+#define VC4_TEX_P1_WRAP_T_SHIFT                    2
+#define VC4_TEX_P1_WRAP_S_MASK                     VC4_MASK(1, 0)
+#define VC4_TEX_P1_WRAP_S_SHIFT                    0
+# define VC4_TEX_P1_WRAP_REPEAT                    0
+# define VC4_TEX_P1_WRAP_CLAMP                     1
+# define VC4_TEX_P1_WRAP_MIRROR                    2
+# define VC4_TEX_P1_WRAP_BORDER                    3
+
+#define VC4_TEX_P2_PTYPE_MASK                      VC4_MASK(31, 30)
+#define VC4_TEX_P2_PTYPE_SHIFT                     30
+# define VC4_TEX_P2_PTYPE_IGNORED                  0
+# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE          1
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS   2
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS      3
+
+/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
+#define VC4_TEX_P2_CMST_MASK                       VC4_MASK(29, 12)
+#define VC4_TEX_P2_CMST_SHIFT                      12
+#define VC4_TEX_P2_BSLOD_MASK                      VC4_MASK(0, 0)
+#define VC4_TEX_P2_BSLOD_SHIFT                     0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
+#define VC4_TEX_P2_CHEIGHT_MASK                    VC4_MASK(22, 12)
+#define VC4_TEX_P2_CHEIGHT_SHIFT                   12
+#define VC4_TEX_P2_CWIDTH_MASK                     VC4_MASK(10, 0)
+#define VC4_TEX_P2_CWIDTH_SHIFT                    0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
+#define VC4_TEX_P2_CYOFF_MASK                      VC4_MASK(22, 12)
+#define VC4_TEX_P2_CYOFF_SHIFT                     12
+#define VC4_TEX_P2_CXOFF_MASK                      VC4_MASK(10, 0)
+#define VC4_TEX_P2_CXOFF_SHIFT                     0
+
+#endif /* VC4_PACKET_H */
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
new file mode 100644
index 000000000000..8a2a312e2c1b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Render command list generation
+ *
+ * In the VC4 driver, render command list generation is performed by the
+ * kernel instead of userspace.  We do this because validating a
+ * user-submitted command list is hard to get right and has high CPU overhead,
+ * while the number of valid configurations for render command lists is
+ * actually fairly low.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+struct vc4_rcl_setup {
+	struct drm_gem_cma_object *color_read;
+	struct drm_gem_cma_object *color_write;
+	struct drm_gem_cma_object *zs_read;
+	struct drm_gem_cma_object *zs_write;
+	struct drm_gem_cma_object *msaa_color_write;
+	struct drm_gem_cma_object *msaa_zs_write;
+
+	struct drm_gem_cma_object *rcl;
+	u32 next_offset;
+};
+
+static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+{
+	*(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 1;
+}
+
+static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
+{
+	*(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 2;
+}
+
+static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
+{
+	*(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 4;
+}
+
+/*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void vc4_store_before_load(struct vc4_rcl_setup *setup)
+{
+	rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+	rcl_u16(setup,
+		VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
+			      VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+		VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
+	rcl_u32(setup, 0); /* no address, since we're in None mode */
+}
+
+/*
+ * Calculates the physical address of the start of a tile in a RCL surface.
+ *
+ * Unlike the other load/store packets,
+ * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
+ * coordinates packet, and instead just store to the address given.
+ */
+static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
+				    struct drm_gem_cma_object *bo,
+				    struct drm_vc4_submit_rcl_surface *surf,
+				    uint8_t x, uint8_t y)
+{
+	return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
+		(DIV_ROUND_UP(exec->args->width, 32) * y + x);
+}
+
+/*
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
+				 uint32_t x, uint32_t y)
+{
+	rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
+	rcl_u8(setup, x);
+	rcl_u8(setup, y);
+}
+
+static void emit_tile(struct vc4_exec_info *exec,
+		      struct vc4_rcl_setup *setup,
+		      uint8_t x, uint8_t y, bool first, bool last)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+
+	/* Note that the load doesn't actually occur until the
+	 * tile coords packet is processed, and only one load
+	 * may be outstanding at a time.
+	 */
+	if (setup->color_read) {
+		if (args->color_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+			rcl_u32(setup,
+				vc4_full_res_offset(exec, setup->color_read,
+						    &args->color_read, x, y) |
+				VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
+		} else {
+			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+			rcl_u16(setup, args->color_read.bits);
+			rcl_u32(setup, setup->color_read->paddr +
+				args->color_read.offset);
+		}
+	}
+
+	if (setup->zs_read) {
+		if (args->zs_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+			rcl_u32(setup,
+				vc4_full_res_offset(exec, setup->zs_read,
+						    &args->zs_read, x, y) |
+				VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
+		} else {
+			if (setup->color_read) {
+				/* Exec previous load. */
+				vc4_tile_coordinates(setup, x, y);
+				vc4_store_before_load(setup);
+			}
+
+			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+			rcl_u16(setup, args->zs_read.bits);
+			rcl_u32(setup, setup->zs_read->paddr +
+				args->zs_read.offset);
+		}
+	}
+
+	/* Clipping depends on tile coordinates having been
+	 * emitted, so we always need one here.
+	 */
+	vc4_tile_coordinates(setup, x, y);
+
+	/* Wait for the binner before jumping to the first
+	 * tile's lists.
+	 */
+	if (first && has_bin)
+		rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+	if (has_bin) {
+		rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
+		rcl_u32(setup, (exec->tile_bo->paddr +
+				exec->tile_alloc_offset +
+				(y * exec->bin_tiles_x + x) * 32));
+	}
+
+	if (setup->msaa_color_write) {
+		bool last_tile_write = (!setup->msaa_zs_write &&
+					!setup->zs_write &&
+					!setup->color_write);
+		uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
+
+		if (!last_tile_write)
+			bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+		else if (last)
+			bits |= VC4_LOADSTORE_FULL_RES_EOF;
+		rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+		rcl_u32(setup,
+			vc4_full_res_offset(exec, setup->msaa_color_write,
+					    &args->msaa_color_write, x, y) |
+			bits);
+	}
+
+	if (setup->msaa_zs_write) {
+		bool last_tile_write = (!setup->zs_write &&
+					!setup->color_write);
+		uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
+
+		if (setup->msaa_color_write)
+			vc4_tile_coordinates(setup, x, y);
+		if (!last_tile_write)
+			bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+		else if (last)
+			bits |= VC4_LOADSTORE_FULL_RES_EOF;
+		rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+		rcl_u32(setup,
+			vc4_full_res_offset(exec, setup->msaa_zs_write,
+					    &args->msaa_zs_write, x, y) |
+			bits);
+	}
+
+	if (setup->zs_write) {
+		bool last_tile_write = !setup->color_write;
+
+		if (setup->msaa_color_write || setup->msaa_zs_write)
+			vc4_tile_coordinates(setup, x, y);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, args->zs_write.bits |
+			(last_tile_write ?
+			 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
+		rcl_u32(setup,
+			(setup->zs_write->paddr + args->zs_write.offset) |
+			((last && last_tile_write) ?
+			 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+	}
+
+	if (setup->color_write) {
+		if (setup->msaa_color_write || setup->msaa_zs_write ||
+		    setup->zs_write) {
+			vc4_tile_coordinates(setup, x, y);
+		}
+
+		if (last)
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+		else
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
+	}
+}
+
+static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
+			     struct vc4_rcl_setup *setup)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	uint8_t min_x_tile = args->min_x_tile;
+	uint8_t min_y_tile = args->min_y_tile;
+	uint8_t max_x_tile = args->max_x_tile;
+	uint8_t max_y_tile = args->max_y_tile;
+	uint8_t xtiles = max_x_tile - min_x_tile + 1;
+	uint8_t ytiles = max_y_tile - min_y_tile + 1;
+	uint8_t x, y;
+	uint32_t size, loop_body_size;
+
+	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
+	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
+
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		size += VC4_PACKET_CLEAR_COLORS_SIZE +
+			VC4_PACKET_TILE_COORDINATES_SIZE +
+			VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	}
+
+	if (setup->color_read) {
+		if (args->color_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+		} else {
+			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+		}
+	}
+	if (setup->zs_read) {
+		if (args->zs_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+		} else {
+			if (setup->color_read &&
+			    !(args->color_read.flags &
+			      VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
+				loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+				loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+			}
+			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+		}
+	}
+
+	if (has_bin) {
+		size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
+		loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
+	}
+
+	if (setup->msaa_color_write)
+		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+	if (setup->msaa_zs_write)
+		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+
+	if (setup->zs_write)
+		loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	if (setup->color_write)
+		loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
+
+	/* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
+	loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
+		((setup->msaa_color_write != NULL) +
+		 (setup->msaa_zs_write != NULL) +
+		 (setup->color_write != NULL) +
+		 (setup->zs_write != NULL) - 1);
+
+	size += xtiles * ytiles * loop_body_size;
+
+	setup->rcl = &vc4_bo_create(dev, size, true)->base;
+	if (!setup->rcl)
+		return -ENOMEM;
+	list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+		      &exec->unref_list);
+
+	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+	rcl_u32(setup,
+		(setup->color_write ? (setup->color_write->paddr +
+				       args->color_write.offset) :
+		 0));
+	rcl_u16(setup, args->width);
+	rcl_u16(setup, args->height);
+	rcl_u16(setup, args->color_write.bits);
+
+	/* The tile buffer gets cleared when the previous tile is stored.  If
+	 * the clear values changed between frames, then the tile buffer has
+	 * stale clear values in it, so we have to do a store in None mode (no
+	 * writes) so that we trigger the tile buffer clear.
+	 */
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
+		rcl_u32(setup, args->clear_color[0]);
+		rcl_u32(setup, args->clear_color[1]);
+		rcl_u32(setup, args->clear_z);
+		rcl_u8(setup, args->clear_s);
+
+		vc4_tile_coordinates(setup, 0, 0);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
+		rcl_u32(setup, 0); /* no address, since we're in None mode */
+	}
+
+	for (y = min_y_tile; y <= max_y_tile; y++) {
+		for (x = min_x_tile; x <= max_x_tile; x++) {
+			bool first = (x == min_x_tile && y == min_y_tile);
+			bool last = (x == max_x_tile && y == max_y_tile);
+
+			emit_tile(exec, setup, x, y, first, last);
+		}
+	}
+
+	BUG_ON(setup->next_offset != size);
+	exec->ct1ca = setup->rcl->paddr;
+	exec->ct1ea = setup->rcl->paddr + setup->next_offset;
+
+	return 0;
+}
+
+static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
+				     struct drm_gem_cma_object *obj,
+				     struct drm_vc4_submit_rcl_surface *surf)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
+
+	if (surf->offset > obj->base.size) {
+		DRM_ERROR("surface offset %d > BO size %zd\n",
+			  surf->offset, obj->base.size);
+		return -EINVAL;
+	}
+
+	if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
+	    render_tiles_stride * args->max_y_tile + args->max_x_tile) {
+		DRM_ERROR("MSAA tile %d, %d out of bounds "
+			  "(bo size %zd, offset %d).\n",
+			  args->max_x_tile, args->max_y_tile,
+			  obj->base.size,
+			  surf->offset);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
+				      struct drm_gem_cma_object **obj,
+				      struct drm_vc4_submit_rcl_surface *surf)
+{
+	if (surf->flags != 0 || surf->bits != 0) {
+		DRM_ERROR("MSAA surface had nonzero flags/bits\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("MSAA write must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	return vc4_full_res_bounds_check(exec, *obj, surf);
+}
+
+static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+				 struct drm_gem_cma_object **obj,
+				 struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_TILING);
+	uint8_t buffer = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+	int cpp;
+	int ret;
+
+	if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+		DRM_ERROR("Extra flags set\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+		if (surf == &exec->args->zs_write) {
+			DRM_ERROR("general zs write may not be a full-res.\n");
+			return -EINVAL;
+		}
+
+		if (surf->bits != 0) {
+			DRM_ERROR("load/store general bits set with "
+				  "full res load/store.\n");
+			return -EINVAL;
+		}
+
+		ret = vc4_full_res_bounds_check(exec, *obj, surf);
+		if (!ret)
+			return ret;
+
+		return 0;
+	}
+
+	if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
+		DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
+		if (format != 0) {
+			DRM_ERROR("No color format should be set for ZS\n");
+			return -EINVAL;
+		}
+		cpp = 4;
+	} else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
+		switch (format) {
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565:
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
+			cpp = 2;
+			break;
+		case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
+			cpp = 4;
+			break;
+		default:
+			DRM_ERROR("Bad tile buffer format\n");
+			return -EINVAL;
+		}
+	} else {
+		DRM_ERROR("Bad load/store buffer %d.\n", buffer);
+		return -EINVAL;
+	}
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("load/store buffer must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
+				    struct vc4_rcl_setup *setup,
+				    struct drm_gem_cma_object **obj,
+				    struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_MEMORY_FORMAT);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_FORMAT);
+	int cpp;
+
+	if (surf->flags != 0) {
+		DRM_ERROR("No flags supported on render config.\n");
+		return -EINVAL;
+	}
+
+	if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_MS_MODE_4X |
+			   VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
+		DRM_ERROR("Unknown bits in render config: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	switch (format) {
+	case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
+	case VC4_RENDER_CONFIG_FORMAT_BGR565:
+		cpp = 2;
+		break;
+	case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
+		cpp = 4;
+		break;
+	default:
+		DRM_ERROR("Bad tile buffer format\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_rcl_setup setup = {0};
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	int ret;
+
+	if (args->min_x_tile > args->max_x_tile ||
+	    args->min_y_tile > args->max_y_tile) {
+		DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
+			  args->min_x_tile, args->min_y_tile,
+			  args->max_x_tile, args->max_y_tile);
+		return -EINVAL;
+	}
+
+	if (has_bin &&
+	    (args->max_x_tile > exec->bin_tiles_x ||
+	     args->max_y_tile > exec->bin_tiles_y)) {
+		DRM_ERROR("Render tiles (%d,%d) outside of bin config "
+			  "(%d,%d)\n",
+			  args->max_x_tile, args->max_y_tile,
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	ret = vc4_rcl_render_config_surface_setup(exec, &setup,
+						  &setup.color_write,
+						  &args->color_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
+					 &args->msaa_color_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
+					 &args->msaa_zs_write);
+	if (ret)
+		return ret;
+
+	/* We shouldn't even have the job submitted to us if there's no
+	 * surface to write out.
+	 */
+	if (!setup.color_write && !setup.zs_write &&
+	    !setup.msaa_color_write && !setup.msaa_zs_write) {
+		DRM_ERROR("RCL requires color or Z/S write\n");
+		return -EINVAL;
+	}
+
+	return vc4_create_rcl_bo(dev, exec, &setup);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
new file mode 100644
index 000000000000..ad7b1ea720c2
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VC4_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vc4
+#define TRACE_INCLUDE_FILE vc4_trace
+
+TRACE_EVENT(vc4_wait_for_seqno_begin,
+	    TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
+	    TP_ARGS(dev, seqno, timeout),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     __field(u64, timeout)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   __entry->timeout = timeout;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu, timeout=%llu",
+		      __entry->dev, __entry->seqno, __entry->timeout)
+);
+
+TRACE_EVENT(vc4_wait_for_seqno_end,
+	    TP_PROTO(struct drm_device *dev, uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev, __entry->seqno)
+);
+
+#endif /* _VC4_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c
new file mode 100644
index 000000000000..e6278f25716b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "vc4_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "vc4_trace.h"
+#endif
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 040ad0d8b8a1..424d515ffcda 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -144,6 +144,21 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+/*
+ * Asks the firmware to turn on power to the V3D engine.
+ *
+ * This may be doable with just the clocks interface, though this
+ * packet does some other register setup from the firmware, too.
+ */
+int
+vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
+{
+	if (on)
+		return pm_generic_poweroff(&vc4->v3d->pdev->dev);
+	else
+		return pm_generic_resume(&vc4->v3d->pdev->dev);
+}
+
 static void vc4_v3d_init_hw(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -161,6 +176,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 	struct vc4_v3d *v3d = NULL;
+	int ret;
 
 	v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
 	if (!v3d)
@@ -180,8 +196,20 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 		return -EINVAL;
 	}
 
+	/* Reset the binner overflow address/size at setup, to be sure
+	 * we don't reuse an old one.
+	 */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
 	vc4_v3d_init_hw(drm);
 
+	ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
+	if (ret) {
+		DRM_ERROR("Failed to install IRQ handler\n");
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -191,6 +219,15 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 
+	drm_irq_uninstall(drm);
+
+	/* Disable the binner's overflow memory address, so the next
+	 * driver probe (if any) doesn't try to reuse our old
+	 * allocation.
+	 */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
 	vc4->v3d = NULL;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
new file mode 100644
index 000000000000..0fb5b994b9dd
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Command list validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory.  So, a user with
+ * access to execute command lists could escalate privilege by
+ * overwriting system memory (drawing to it as a framebuffer) or
+ * reading system memory it shouldn't (reading it as a texture, or
+ * uniform data, or vertex data).
+ *
+ * This validates command lists to ensure that all accesses are within
+ * the bounds of the GEM objects referenced.  It explicitly whitelists
+ * packets, and looks at the offsets in any address fields to make
+ * sure they're constrained within the BOs they reference.
+ *
+ * Note that because of the validation that's happening anyway, this
+ * is where GEM relocation processing happens.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+#define VALIDATE_ARGS \
+	struct vc4_exec_info *exec,			\
+	void *validated,				\
+	void *untrusted
+
+/** Return the width in pixels of a 64-byte microtile. */
+static uint32_t
+utile_width(int cpp)
+{
+	switch (cpp) {
+	case 1:
+	case 2:
+		return 8;
+	case 4:
+		return 4;
+	case 8:
+		return 2;
+	default:
+		DRM_ERROR("unknown cpp: %d\n", cpp);
+		return 1;
+	}
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+static uint32_t
+utile_height(int cpp)
+{
+	switch (cpp) {
+	case 1:
+		return 8;
+	case 2:
+	case 4:
+	case 8:
+		return 4;
+	default:
+		DRM_ERROR("unknown cpp: %d\n", cpp);
+		return 1;
+	}
+}
+
+/**
+ * The texture unit decides what tiling format a particular miplevel is using
+ * this function, so we lay out our miptrees accordingly.
+ */
+static bool
+size_is_lt(uint32_t width, uint32_t height, int cpp)
+{
+	return (width <= 4 * utile_width(cpp) ||
+		height <= 4 * utile_height(cpp));
+}
+
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
+{
+	struct drm_gem_cma_object *obj;
+	struct vc4_bo *bo;
+
+	if (hindex >= exec->bo_count) {
+		DRM_ERROR("BO index %d greater than BO count %d\n",
+			  hindex, exec->bo_count);
+		return NULL;
+	}
+	obj = exec->bo[hindex];
+	bo = to_vc4_bo(&obj->base);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("Trying to use shader BO as something other than "
+			  "a shader\n");
+		return NULL;
+	}
+
+	return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
+}
+
+static bool
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
+{
+	/* Note that the untrusted pointer passed to these functions is
+	 * incremented past the packet byte.
+	 */
+	return (untrusted - 1 == exec->bin_u + pos);
+}
+
+static uint32_t
+gl_shader_rec_size(uint32_t pointer_bits)
+{
+	uint32_t attribute_count = pointer_bits & 7;
+	bool extended = pointer_bits & 8;
+
+	if (attribute_count == 0)
+		attribute_count = 8;
+
+	if (extended)
+		return 100 + attribute_count * 4;
+	else
+		return 36 + attribute_count * 8;
+}
+
+bool
+vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
+		   uint32_t offset, uint8_t tiling_format,
+		   uint32_t width, uint32_t height, uint8_t cpp)
+{
+	uint32_t aligned_width, aligned_height, stride, size;
+	uint32_t utile_w = utile_width(cpp);
+	uint32_t utile_h = utile_height(cpp);
+
+	/* The shaded vertex format stores signed 12.4 fixed point
+	 * (-2048,2047) offsets from the viewport center, so we should
+	 * never have a render target larger than 4096.  The texture
+	 * unit can only sample from 2048x2048, so it's even more
+	 * restricted.  This lets us avoid worrying about overflow in
+	 * our math.
+	 */
+	if (width > 4096 || height > 4096) {
+		DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+		return false;
+	}
+
+	switch (tiling_format) {
+	case VC4_TILING_FORMAT_LINEAR:
+		aligned_width = round_up(width, utile_w);
+		aligned_height = height;
+		break;
+	case VC4_TILING_FORMAT_T:
+		aligned_width = round_up(width, utile_w * 8);
+		aligned_height = round_up(height, utile_h * 8);
+		break;
+	case VC4_TILING_FORMAT_LT:
+		aligned_width = round_up(width, utile_w);
+		aligned_height = round_up(height, utile_h);
+		break;
+	default:
+		DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
+		return false;
+	}
+
+	stride = aligned_width * cpp;
+	size = stride * aligned_height;
+
+	if (size + offset < size ||
+	    size + offset > fbo->base.size) {
+		DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
+			  width, height,
+			  aligned_width, aligned_height,
+			  size, offset, fbo->base.size);
+		return false;
+	}
+
+	return true;
+}
+
+static int
+validate_flush(VALIDATE_ARGS)
+{
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+		DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
+		return -EINVAL;
+	}
+	exec->found_flush = true;
+
+	return 0;
+}
+
+static int
+validate_start_tile_binning(VALIDATE_ARGS)
+{
+	if (exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
+	exec->found_start_tile_binning_packet = true;
+
+	if (!exec->found_tile_binning_mode_config_packet) {
+		DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+		DRM_ERROR("Bin CL must end with "
+			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+	exec->found_increment_semaphore_packet = true;
+
+	return 0;
+}
+
+static int
+validate_indexed_prim_list(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *ib;
+	uint32_t length = *(uint32_t *)(untrusted + 1);
+	uint32_t offset = *(uint32_t *)(untrusted + 5);
+	uint32_t max_index = *(uint32_t *)(untrusted + 9);
+	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
+	struct vc4_shader_state *shader_state;
+
+	/* Check overflow condition */
+	if (exec->shader_state_count == 0) {
+		DRM_ERROR("shader state must precede primitives\n");
+		return -EINVAL;
+	}
+	shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+	if (max_index > shader_state->max_index)
+		shader_state->max_index = max_index;
+
+	ib = vc4_use_handle(exec, 0);
+	if (!ib)
+		return -EINVAL;
+
+	if (offset > ib->base.size ||
+	    (ib->base.size - offset) / index_size < length) {
+		DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
+			  offset, length, index_size, ib->base.size);
+		return -EINVAL;
+	}
+
+	*(uint32_t *)(validated + 5) = ib->paddr + offset;
+
+	return 0;
+}
+
+static int
+validate_gl_array_primitive(VALIDATE_ARGS)
+{
+	uint32_t length = *(uint32_t *)(untrusted + 1);
+	uint32_t base_index = *(uint32_t *)(untrusted + 5);
+	uint32_t max_index;
+	struct vc4_shader_state *shader_state;
+
+	/* Check overflow condition */
+	if (exec->shader_state_count == 0) {
+		DRM_ERROR("shader state must precede primitives\n");
+		return -EINVAL;
+	}
+	shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+	if (length + base_index < length) {
+		DRM_ERROR("primitive vertex count overflow\n");
+		return -EINVAL;
+	}
+	max_index = length + base_index - 1;
+
+	if (max_index > shader_state->max_index)
+		shader_state->max_index = max_index;
+
+	return 0;
+}
+
+static int
+validate_gl_shader_state(VALIDATE_ARGS)
+{
+	uint32_t i = exec->shader_state_count++;
+
+	if (i >= exec->shader_state_size) {
+		DRM_ERROR("More requests for shader states than declared\n");
+		return -EINVAL;
+	}
+
+	exec->shader_state[i].addr = *(uint32_t *)untrusted;
+	exec->shader_state[i].max_index = 0;
+
+	if (exec->shader_state[i].addr & ~0xf) {
+		DRM_ERROR("high bits set in GL shader rec reference\n");
+		return -EINVAL;
+	}
+
+	*(uint32_t *)validated = (exec->shader_rec_p +
+				  exec->shader_state[i].addr);
+
+	exec->shader_rec_p +=
+		roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
+
+	return 0;
+}
+
+static int
+validate_tile_binning_config(VALIDATE_ARGS)
+{
+	struct drm_device *dev = exec->exec_bo->base.dev;
+	struct vc4_bo *tile_bo;
+	uint8_t flags;
+	uint32_t tile_state_size, tile_alloc_size;
+	uint32_t tile_count;
+
+	if (exec->found_tile_binning_mode_config_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		return -EINVAL;
+	}
+	exec->found_tile_binning_mode_config_packet = true;
+
+	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
+	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
+	flags = *(uint8_t *)(untrusted + 14);
+
+	if (exec->bin_tiles_x == 0 ||
+	    exec->bin_tiles_y == 0) {
+		DRM_ERROR("Tile binning config of %dx%d too small\n",
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
+		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
+		DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
+		return -EINVAL;
+	}
+
+	/* The tile state data array is 48 bytes per tile, and we put it at
+	 * the start of a BO containing both it and the tile alloc.
+	 */
+	tile_state_size = 48 * tile_count;
+
+	/* Since the tile alloc array will follow us, align. */
+	exec->tile_alloc_offset = roundup(tile_state_size, 4096);
+
+	*(uint8_t *)(validated + 14) =
+		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+	/* Initial block size. */
+	tile_alloc_size = 32 * tile_count;
+
+	/*
+	 * The initial allocation gets rounded to the next 256 bytes before
+	 * the hardware starts fulfilling further allocations.
+	 */
+	tile_alloc_size = roundup(tile_alloc_size, 256);
+
+	/* Add space for the extra allocations.  This is what gets used first,
+	 * before overflow memory.  It must have at least 4096 bytes, but we
+	 * want to avoid overflow memory usage if possible.
+	 */
+	tile_alloc_size += 1024 * 1024;
+
+	tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
+				true);
+	exec->tile_bo = &tile_bo->base;
+	if (!exec->tile_bo)
+		return -ENOMEM;
+	list_add_tail(&tile_bo->unref_head, &exec->unref_list);
+
+	/* tile alloc address. */
+	*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+					exec->tile_alloc_offset);
+	/* tile alloc size. */
+	*(uint32_t *)(validated + 4) = tile_alloc_size;
+	/* tile state address. */
+	*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
+
+	return 0;
+}
+
+static int
+validate_gem_handles(VALIDATE_ARGS)
+{
+	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
+	return 0;
+}
+
+#define VC4_DEFINE_PACKET(packet, func) \
+	[packet] = { packet ## _SIZE, #packet, func }
+
+static const struct cmd_info {
+	uint16_t len;
+	const char *name;
+	int (*func)(struct vc4_exec_info *exec, void *validated,
+		    void *untrusted);
+} cmd_info[] = {
+	VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
+			  validate_start_tile_binning),
+	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
+			  validate_increment_semaphore),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
+			  validate_indexed_prim_list),
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
+			  validate_gl_array_primitive),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
+	/* Note: The docs say this was also 105, but it was 106 in the
+	 * initial userland code drop.
+	 */
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
+			  validate_tile_binning_config),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
+};
+
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec)
+{
+	uint32_t len = exec->args->bin_cl_size;
+	uint32_t dst_offset = 0;
+	uint32_t src_offset = 0;
+
+	while (src_offset < len) {
+		void *dst_pkt = validated + dst_offset;
+		void *src_pkt = unvalidated + src_offset;
+		u8 cmd = *(uint8_t *)src_pkt;
+		const struct cmd_info *info;
+
+		if (cmd >= ARRAY_SIZE(cmd_info)) {
+			DRM_ERROR("0x%08x: packet %d out of bounds\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		info = &cmd_info[cmd];
+		if (!info->name) {
+			DRM_ERROR("0x%08x: packet %d invalid\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		if (src_offset + info->len > len) {
+			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
+				  "exceeds bounds (0x%08x)\n",
+				  src_offset, cmd, info->name, info->len,
+				  src_offset + len);
+			return -EINVAL;
+		}
+
+		if (cmd != VC4_PACKET_GEM_HANDLES)
+			memcpy(dst_pkt, src_pkt, info->len);
+
+		if (info->func && info->func(exec,
+					     dst_pkt + 1,
+					     src_pkt + 1)) {
+			DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
+				  src_offset, cmd, info->name);
+			return -EINVAL;
+		}
+
+		src_offset += info->len;
+		/* GEM handle loading doesn't produce HW packets. */
+		if (cmd != VC4_PACKET_GEM_HANDLES)
+			dst_offset += info->len;
+
+		/* When the CL hits halt, it'll stop reading anything else. */
+		if (cmd == VC4_PACKET_HALT)
+			break;
+	}
+
+	exec->ct0ea = exec->ct0ca + dst_offset;
+
+	if (!exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
+
+	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
+	 * semaphore is used to trigger the render CL to start up, and the
+	 * FLUSH is what caps the bin lists with
+	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+	 * render CL when they get called to) and actually triggers the queued
+	 * semaphore increment.
+	 */
+	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+			  "VC4_PACKET_FLUSH\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool
+reloc_tex(struct vc4_exec_info *exec,
+	  void *uniform_data_u,
+	  struct vc4_texture_sample_info *sample,
+	  uint32_t texture_handle_index)
+
+{
+	struct drm_gem_cma_object *tex;
+	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
+	uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
+	uint32_t p2 = (sample->p_offset[2] != ~0 ?
+		       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
+	uint32_t p3 = (sample->p_offset[3] != ~0 ?
+		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
+	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
+	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
+	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
+	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
+	uint32_t cpp, tiling_format, utile_w, utile_h;
+	uint32_t i;
+	uint32_t cube_map_stride = 0;
+	enum vc4_texture_data_type type;
+
+	tex = vc4_use_bo(exec, texture_handle_index);
+	if (!tex)
+		return false;
+
+	if (sample->is_direct) {
+		uint32_t remaining_size = tex->base.size - p0;
+
+		if (p0 > tex->base.size - 4) {
+			DRM_ERROR("UBO offset greater than UBO size\n");
+			goto fail;
+		}
+		if (p1 > remaining_size - 4) {
+			DRM_ERROR("UBO clamp would allow reads "
+				  "outside of UBO\n");
+			goto fail;
+		}
+		*validated_p0 = tex->paddr + p0;
+		return true;
+	}
+
+	if (width == 0)
+		width = 2048;
+	if (height == 0)
+		height = 2048;
+
+	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
+		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
+			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
+		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
+			if (cube_map_stride) {
+				DRM_ERROR("Cube map stride set twice\n");
+				goto fail;
+			}
+
+			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
+		}
+		if (!cube_map_stride) {
+			DRM_ERROR("Cube map stride not set\n");
+			goto fail;
+		}
+	}
+
+	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
+		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
+
+	switch (type) {
+	case VC4_TEXTURE_TYPE_RGBA8888:
+	case VC4_TEXTURE_TYPE_RGBX8888:
+	case VC4_TEXTURE_TYPE_RGBA32R:
+		cpp = 4;
+		break;
+	case VC4_TEXTURE_TYPE_RGBA4444:
+	case VC4_TEXTURE_TYPE_RGBA5551:
+	case VC4_TEXTURE_TYPE_RGB565:
+	case VC4_TEXTURE_TYPE_LUMALPHA:
+	case VC4_TEXTURE_TYPE_S16F:
+	case VC4_TEXTURE_TYPE_S16:
+		cpp = 2;
+		break;
+	case VC4_TEXTURE_TYPE_LUMINANCE:
+	case VC4_TEXTURE_TYPE_ALPHA:
+	case VC4_TEXTURE_TYPE_S8:
+		cpp = 1;
+		break;
+	case VC4_TEXTURE_TYPE_ETC1:
+	case VC4_TEXTURE_TYPE_BW1:
+	case VC4_TEXTURE_TYPE_A4:
+	case VC4_TEXTURE_TYPE_A1:
+	case VC4_TEXTURE_TYPE_RGBA64:
+	case VC4_TEXTURE_TYPE_YUV422R:
+	default:
+		DRM_ERROR("Texture format %d unsupported\n", type);
+		goto fail;
+	}
+	utile_w = utile_width(cpp);
+	utile_h = utile_height(cpp);
+
+	if (type == VC4_TEXTURE_TYPE_RGBA32R) {
+		tiling_format = VC4_TILING_FORMAT_LINEAR;
+	} else {
+		if (size_is_lt(width, height, cpp))
+			tiling_format = VC4_TILING_FORMAT_LT;
+		else
+			tiling_format = VC4_TILING_FORMAT_T;
+	}
+
+	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
+				tiling_format, width, height, cpp)) {
+		goto fail;
+	}
+
+	/* The mipmap levels are stored before the base of the texture.  Make
+	 * sure there is actually space in the BO.
+	 */
+	for (i = 1; i <= miplevels; i++) {
+		uint32_t level_width = max(width >> i, 1u);
+		uint32_t level_height = max(height >> i, 1u);
+		uint32_t aligned_width, aligned_height;
+		uint32_t level_size;
+
+		/* Once the levels get small enough, they drop from T to LT. */
+		if (tiling_format == VC4_TILING_FORMAT_T &&
+		    size_is_lt(level_width, level_height, cpp)) {
+			tiling_format = VC4_TILING_FORMAT_LT;
+		}
+
+		switch (tiling_format) {
+		case VC4_TILING_FORMAT_T:
+			aligned_width = round_up(level_width, utile_w * 8);
+			aligned_height = round_up(level_height, utile_h * 8);
+			break;
+		case VC4_TILING_FORMAT_LT:
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = round_up(level_height, utile_h);
+			break;
+		default:
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = level_height;
+			break;
+		}
+
+		level_size = aligned_width * cpp * aligned_height;
+
+		if (offset < level_size) {
+			DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
+				  "overflowed buffer bounds (offset %d)\n",
+				  i, level_width, level_height,
+				  aligned_width, aligned_height,
+				  level_size, offset);
+			goto fail;
+		}
+
+		offset -= level_size;
+	}
+
+	*validated_p0 = tex->paddr + p0;
+
+	return true;
+ fail:
+	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+	return false;
+}
+
+static int
+validate_gl_shader_rec(struct drm_device *dev,
+		       struct vc4_exec_info *exec,
+		       struct vc4_shader_state *state)
+{
+	uint32_t *src_handles;
+	void *pkt_u, *pkt_v;
+	static const uint32_t shader_reloc_offsets[] = {
+		4, /* fs */
+		16, /* vs */
+		28, /* cs */
+	};
+	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+	struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+	uint32_t nr_attributes, nr_relocs, packet_size;
+	int i;
+
+	nr_attributes = state->addr & 0x7;
+	if (nr_attributes == 0)
+		nr_attributes = 8;
+	packet_size = gl_shader_rec_size(state->addr);
+
+	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
+	if (nr_relocs * 4 > exec->shader_rec_size) {
+		DRM_ERROR("overflowed shader recs reading %d handles "
+			  "from %d bytes left\n",
+			  nr_relocs, exec->shader_rec_size);
+		return -EINVAL;
+	}
+	src_handles = exec->shader_rec_u;
+	exec->shader_rec_u += nr_relocs * 4;
+	exec->shader_rec_size -= nr_relocs * 4;
+
+	if (packet_size > exec->shader_rec_size) {
+		DRM_ERROR("overflowed shader recs copying %db packet "
+			  "from %d bytes left\n",
+			  packet_size, exec->shader_rec_size);
+		return -EINVAL;
+	}
+	pkt_u = exec->shader_rec_u;
+	pkt_v = exec->shader_rec_v;
+	memcpy(pkt_v, pkt_u, packet_size);
+	exec->shader_rec_u += packet_size;
+	/* Shader recs have to be aligned to 16 bytes (due to the attribute
+	 * flags being in the low bytes), so round the next validated shader
+	 * rec address up.  This should be safe, since we've got so many
+	 * relocations in a shader rec packet.
+	 */
+	BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
+	exec->shader_rec_v += roundup(packet_size, 16);
+	exec->shader_rec_size -= packet_size;
+
+	if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+		DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < shader_reloc_count; i++) {
+		if (src_handles[i] > exec->bo_count) {
+			DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+			return -EINVAL;
+		}
+
+		bo[i] = exec->bo[src_handles[i]];
+		if (!bo[i])
+			return -EINVAL;
+	}
+	for (i = shader_reloc_count; i < nr_relocs; i++) {
+		bo[i] = vc4_use_bo(exec, src_handles[i]);
+		if (!bo[i])
+			return -EINVAL;
+	}
+
+	for (i = 0; i < shader_reloc_count; i++) {
+		struct vc4_validated_shader_info *validated_shader;
+		uint32_t o = shader_reloc_offsets[i];
+		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
+		uint32_t *texture_handles_u;
+		void *uniform_data_u;
+		uint32_t tex;
+
+		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
+
+		if (src_offset != 0) {
+			DRM_ERROR("Shaders must be at offset 0 of "
+				  "the BO.\n");
+			return -EINVAL;
+		}
+
+		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+		if (!validated_shader)
+			return -EINVAL;
+
+		if (validated_shader->uniforms_src_size >
+		    exec->uniforms_size) {
+			DRM_ERROR("Uniforms src buffer overflow\n");
+			return -EINVAL;
+		}
+
+		texture_handles_u = exec->uniforms_u;
+		uniform_data_u = (texture_handles_u +
+				  validated_shader->num_texture_samples);
+
+		memcpy(exec->uniforms_v, uniform_data_u,
+		       validated_shader->uniforms_size);
+
+		for (tex = 0;
+		     tex < validated_shader->num_texture_samples;
+		     tex++) {
+			if (!reloc_tex(exec,
+				       uniform_data_u,
+				       &validated_shader->texture_samples[tex],
+				       texture_handles_u[tex])) {
+				return -EINVAL;
+			}
+		}
+
+		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+
+		exec->uniforms_u += validated_shader->uniforms_src_size;
+		exec->uniforms_v += validated_shader->uniforms_size;
+		exec->uniforms_p += validated_shader->uniforms_size;
+	}
+
+	for (i = 0; i < nr_attributes; i++) {
+		struct drm_gem_cma_object *vbo =
+			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
+		uint32_t o = 36 + i * 8;
+		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
+		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
+		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
+		uint32_t max_index;
+
+		if (state->addr & 0x8)
+			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
+
+		if (vbo->base.size < offset ||
+		    vbo->base.size - offset < attr_size) {
+			DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
+				  offset, attr_size, vbo->base.size);
+			return -EINVAL;
+		}
+
+		if (stride != 0) {
+			max_index = ((vbo->base.size - offset - attr_size) /
+				     stride);
+			if (state->max_index > max_index) {
+				DRM_ERROR("primitives use index %d out of "
+					  "supplied %d\n",
+					  state->max_index, max_index);
+				return -EINVAL;
+			}
+		}
+
+		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
+	}
+
+	return 0;
+}
+
+int
+vc4_validate_shader_recs(struct drm_device *dev,
+			 struct vc4_exec_info *exec)
+{
+	uint32_t i;
+	int ret = 0;
+
+	for (i = 0; i < exec->shader_state_count; i++) {
+		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 74de18416be9..fe4161bc93ae 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -26,14 +26,155 @@
 
 #include "drm.h"
 
+#define DRM_VC4_SUBMIT_CL                         0x00
+#define DRM_VC4_WAIT_SEQNO                        0x01
+#define DRM_VC4_WAIT_BO                           0x02
 #define DRM_VC4_CREATE_BO                         0x03
 #define DRM_VC4_MMAP_BO                           0x04
 #define DRM_VC4_CREATE_SHADER_BO                  0x05
 
+#define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+#define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+#define DRM_IOCTL_VC4_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
 #define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
 #define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
 #define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
 
+struct drm_vc4_submit_rcl_surface {
+	__u32 hindex; /* Handle index, or ~0 if not present. */
+	__u32 offset; /* Offset to start of buffer. */
+	/*
+	 * Bits for either render config (color_write) or load/store packet.
+	 * Bits should all be 0 for MSAA load/stores.
+	 */
+	__u16 bits;
+
+#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES		(1 << 0)
+	__u16 flags;
+};
+
+/**
+ * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * Drivers typically use GPU BOs to store batchbuffers / command lists and
+ * their associated state.  However, because the VC4 lacks an MMU, we have to
+ * do validation of memory accesses by the GPU commands.  If we were to store
+ * our commands in BOs, we'd need to do uncached readback from them to do the
+ * validation process, which is too expensive.  Instead, userspace accumulates
+ * commands and associated state in plain memory, then the kernel copies the
+ * data to its own address space, and then validates and stores it in a GPU
+ * BO.
+ */
+struct drm_vc4_submit_cl {
+	/* Pointer to the binner command list.
+	 *
+	 * This is the first set of commands executed, which runs the
+	 * coordinate shader to determine where primitives land on the screen,
+	 * then writes out the state updates and draw calls necessary per tile
+	 * to the tile allocation BO.
+	 */
+	__u64 bin_cl;
+
+	/* Pointer to the shader records.
+	 *
+	 * Shader records are the structures read by the hardware that contain
+	 * pointers to uniforms, shaders, and vertex attributes.  The
+	 * reference to the shader record has enough information to determine
+	 * how many pointers are necessary (fixed number for shaders/uniforms,
+	 * and an attribute count), so those BO indices into bo_handles are
+	 * just stored as __u32s before each shader record passed in.
+	 */
+	__u64 shader_rec;
+
+	/* Pointer to uniform data and texture handles for the textures
+	 * referenced by the shader.
+	 *
+	 * For each shader state record, there is a set of uniform data in the
+	 * order referenced by the record (FS, VS, then CS).  Each set of
+	 * uniform data has a __u32 index into bo_handles per texture
+	 * sample operation, in the order the QPU_W_TMUn_S writes appear in
+	 * the program.  Following the texture BO handle indices is the actual
+	 * uniform data.
+	 *
+	 * The individual uniform state blocks don't have sizes passed in,
+	 * because the kernel has to determine the sizes anyway during shader
+	 * code validation.
+	 */
+	__u64 uniforms;
+	__u64 bo_handles;
+
+	/* Size in bytes of the binner command list. */
+	__u32 bin_cl_size;
+	/* Size in bytes of the set of shader records. */
+	__u32 shader_rec_size;
+	/* Number of shader records.
+	 *
+	 * This could just be computed from the contents of shader_records and
+	 * the address bits of references to them from the bin CL, but it
+	 * keeps the kernel from having to resize some allocations it makes.
+	 */
+	__u32 shader_rec_count;
+	/* Size in bytes of the uniform state. */
+	__u32 uniforms_size;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/* RCL setup: */
+	__u16 width;
+	__u16 height;
+	__u8 min_x_tile;
+	__u8 min_y_tile;
+	__u8 max_x_tile;
+	__u8 max_y_tile;
+	struct drm_vc4_submit_rcl_surface color_read;
+	struct drm_vc4_submit_rcl_surface color_write;
+	struct drm_vc4_submit_rcl_surface zs_read;
+	struct drm_vc4_submit_rcl_surface zs_write;
+	struct drm_vc4_submit_rcl_surface msaa_color_write;
+	struct drm_vc4_submit_rcl_surface msaa_zs_write;
+	__u32 clear_color[2];
+	__u32 clear_z;
+	__u8 clear_s;
+
+	__u32 pad:24;
+
+#define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+	__u32 flags;
+
+	/* Returned value of the seqno of this render job (for the
+	 * wait ioctl).
+	 */
+	__u64 seqno;
+};
+
+/**
+ * struct drm_vc4_wait_seqno - ioctl argument for waiting for
+ * DRM_VC4_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc4_wait_seqno {
+	__u64 seqno;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc4_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC4_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc4_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__u64 timeout_ns;
+};
+
 /**
  * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
  *
-- 
cgit v1.2.3-58-ga151


From 214613656b5179f0daab6e0a080814b5100d45f0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 30 Oct 2015 10:09:02 -0700
Subject: drm/vc4: Add an interface for capturing the GPU state after a hang.

This can be parsed with vc4-gpu-tools tools for trying to figure out
what was going on.

v2: Use __u32-style types.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_drv.c |   2 +
 drivers/gpu/drm/vc4/vc4_drv.h |   4 +
 drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/vc4_drm.h    |  45 ++++++++++
 4 files changed, 236 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 2cfee5959455..97226b677bf4 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -80,6 +80,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
+			  DRM_ROOT_ONLY),
 };
 
 static struct drm_driver vc4_drm_driver = {
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index f9927d87369f..080865ec2bae 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -19,6 +19,8 @@ struct vc4_dev {
 
 	struct drm_fbdev_cma *fbdev;
 
+	struct vc4_hang_state *hang_state;
+
 	/* The kernel-space BO cache.  Tracks buffers that have been
 	 * unreferenced by all other users (refcounts of 0!) but not
 	 * yet freed, so we can do cheap allocations.
@@ -361,6 +363,8 @@ int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file_priv);
 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
+int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 void *vc4_prime_vmap(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 5fb0556e001e..39f29e759334 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *dev)
 		  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
 }
 
+struct vc4_hang_state {
+	struct drm_vc4_get_hang_state user_state;
+
+	u32 bo_count;
+	struct drm_gem_object **bo;
+};
+
+static void
+vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
+{
+	unsigned int i;
+
+	mutex_lock(&dev->struct_mutex);
+	for (i = 0; i < state->user_state.bo_count; i++)
+		drm_gem_object_unreference(state->bo[i]);
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(state);
+}
+
+int
+vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct drm_vc4_get_hang_state *get_state = data;
+	struct drm_vc4_get_hang_state_bo *bo_state;
+	struct vc4_hang_state *kernel_state;
+	struct drm_vc4_get_hang_state *state;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
+	u32 i;
+	int ret;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	kernel_state = vc4->hang_state;
+	if (!kernel_state) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return -ENOENT;
+	}
+	state = &kernel_state->user_state;
+
+	/* If the user's array isn't big enough, just return the
+	 * required array size.
+	 */
+	if (get_state->bo_count < state->bo_count) {
+		get_state->bo_count = state->bo_count;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return 0;
+	}
+
+	vc4->hang_state = NULL;
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	/* Save the user's BO pointer, so we don't stomp it with the memcpy. */
+	state->bo = get_state->bo;
+	memcpy(get_state, state, sizeof(*state));
+
+	bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
+	if (!bo_state) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	for (i = 0; i < state->bo_count; i++) {
+		struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
+		u32 handle;
+
+		ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
+					    &handle);
+
+		if (ret) {
+			state->bo_count = i - 1;
+			goto err;
+		}
+		bo_state[i].handle = handle;
+		bo_state[i].paddr = vc4_bo->base.paddr;
+		bo_state[i].size = vc4_bo->base.base.size;
+	}
+
+	ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
+			   bo_state,
+			   state->bo_count * sizeof(*bo_state));
+	kfree(bo_state);
+
+err_free:
+
+	vc4_free_hang_state(dev, kernel_state);
+
+err:
+	return ret;
+}
+
+static void
+vc4_save_hang_state(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_get_hang_state *state;
+	struct vc4_hang_state *kernel_state;
+	struct vc4_exec_info *exec;
+	struct vc4_bo *bo;
+	unsigned long irqflags;
+	unsigned int i, unref_list_count;
+
+	kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
+	if (!kernel_state)
+		return;
+
+	state = &kernel_state->user_state;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	exec = vc4_first_job(vc4);
+	if (!exec) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return;
+	}
+
+	unref_list_count = 0;
+	list_for_each_entry(bo, &exec->unref_list, unref_head)
+		unref_list_count++;
+
+	state->bo_count = exec->bo_count + unref_list_count;
+	kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
+				   GFP_ATOMIC);
+	if (!kernel_state->bo) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return;
+	}
+
+	for (i = 0; i < exec->bo_count; i++) {
+		drm_gem_object_reference(&exec->bo[i]->base);
+		kernel_state->bo[i] = &exec->bo[i]->base;
+	}
+
+	list_for_each_entry(bo, &exec->unref_list, unref_head) {
+		drm_gem_object_reference(&bo->base.base);
+		kernel_state->bo[i] = &bo->base.base;
+		i++;
+	}
+
+	state->start_bin = exec->ct0ca;
+	state->start_render = exec->ct1ca;
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	state->ct0ca = V3D_READ(V3D_CTNCA(0));
+	state->ct0ea = V3D_READ(V3D_CTNEA(0));
+
+	state->ct1ca = V3D_READ(V3D_CTNCA(1));
+	state->ct1ea = V3D_READ(V3D_CTNEA(1));
+
+	state->ct0cs = V3D_READ(V3D_CTNCS(0));
+	state->ct1cs = V3D_READ(V3D_CTNCS(1));
+
+	state->ct0ra0 = V3D_READ(V3D_CT00RA0);
+	state->ct1ra0 = V3D_READ(V3D_CT01RA0);
+
+	state->bpca = V3D_READ(V3D_BPCA);
+	state->bpcs = V3D_READ(V3D_BPCS);
+	state->bpoa = V3D_READ(V3D_BPOA);
+	state->bpos = V3D_READ(V3D_BPOS);
+
+	state->vpmbase = V3D_READ(V3D_VPMBASE);
+
+	state->dbge = V3D_READ(V3D_DBGE);
+	state->fdbgo = V3D_READ(V3D_FDBGO);
+	state->fdbgb = V3D_READ(V3D_FDBGB);
+	state->fdbgr = V3D_READ(V3D_FDBGR);
+	state->fdbgs = V3D_READ(V3D_FDBGS);
+	state->errstat = V3D_READ(V3D_ERRSTAT);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	if (vc4->hang_state) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		vc4_free_hang_state(dev, kernel_state);
+	} else {
+		vc4->hang_state = kernel_state;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+	}
+}
+
 static void
 vc4_reset(struct drm_device *dev)
 {
@@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work)
 	struct vc4_dev *vc4 =
 		container_of(work, struct vc4_dev, hangcheck.reset_work);
 
+	vc4_save_hang_state(vc4->dev);
+
 	vc4_reset(vc4->dev);
 }
 
@@ -679,4 +861,7 @@ vc4_gem_destroy(struct drm_device *dev)
 	}
 
 	vc4_bo_cache_destroy(dev);
+
+	if (vc4->hang_state)
+		vc4_free_hang_state(dev, vc4->hang_state);
 }
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index fe4161bc93ae..eeb37e394f13 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -32,6 +32,7 @@
 #define DRM_VC4_CREATE_BO                         0x03
 #define DRM_VC4_MMAP_BO                           0x04
 #define DRM_VC4_CREATE_SHADER_BO                  0x05
+#define DRM_VC4_GET_HANG_STATE                    0x06
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -39,6 +40,7 @@
 #define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
 #define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
 #define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
+#define DRM_IOCTL_VC4_GET_HANG_STATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -231,4 +233,47 @@ struct drm_vc4_create_shader_bo {
 	__u32 pad;
 };
 
+struct drm_vc4_get_hang_state_bo {
+	__u32 handle;
+	__u32 paddr;
+	__u32 size;
+	__u32 pad;
+};
+
+/**
+ * struct drm_vc4_hang_state - ioctl argument for collecting state
+ * from a GPU hang for analysis.
+*/
+struct drm_vc4_get_hang_state {
+	/** Pointer to array of struct drm_vc4_get_hang_state_bo. */
+	__u64 bo;
+	/**
+	 * On input, the size of the bo array.  Output is the number
+	 * of bos to be returned.
+	 */
+	__u32 bo_count;
+
+	__u32 start_bin, start_render;
+
+	__u32 ct0ca, ct0ea;
+	__u32 ct1ca, ct1ea;
+	__u32 ct0cs, ct1cs;
+	__u32 ct0ra0, ct1ra0;
+
+	__u32 bpca, bpcs;
+	__u32 bpoa, bpos;
+
+	__u32 vpmbase;
+
+	__u32 dbge;
+	__u32 fdbgo;
+	__u32 fdbgb;
+	__u32 fdbgr;
+	__u32 fdbgs;
+	__u32 errstat;
+
+	/* Pad that we may save more registers into in the future. */
+	__u32 pad[16];
+};
+
 #endif /* _UAPI_VC4_DRM_H_ */
-- 
cgit v1.2.3-58-ga151


From 506a8e87d8d2746b9e9d2433503fe237c54e4750 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 8 Dec 2015 11:55:07 +0000
Subject: drm/i915: Add soft-pinning API for execbuffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

v6: Trivial rebase on latest drm-intel-nightly

v7: Catch attempts to pin above the max virtual address size and return
EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and
EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin
something at an offset above 4GB (Chris, Daniel Vetter).

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Zou Nanhai <nanhai.zou@intel.com>
Cc: Kristian Høgsberg <hoegsberg@gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Michel Thierry <michel.thierry@intel.com>
Acked-by: PDT
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c            |  3 ++
 drivers/gpu/drm/i915/i915_drv.h            |  2 +
 drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-
 include/uapi/drm/i915_drm.h                | 12 ++++--
 6 files changed, 111 insertions(+), 25 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a81c76603544..52b82893ba42 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_RESOURCE_STREAMER:
 		value = HAS_RESOURCE_STREAMER(dev);
 		break;
+	case I915_PARAM_HAS_EXEC_SOFTPIN:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6dee97c0d5d0..547c14269292 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2868,6 +2868,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_UPDATE	(1<<5)
 #define PIN_ZONE_4G	(1<<6)
 #define PIN_HIGH	(1<<7)
+#define PIN_OFFSET_FIXED	(1<<8)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3213,6 +3214,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned flags);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 
 /* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a531cb83295c..d7b8d16ff6e6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3468,30 +3468,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		goto err_unpin;
 
-	if (flags & PIN_HIGH) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
+	if (flags & PIN_OFFSET_FIXED) {
+		uint64_t offset = flags & PIN_OFFSET_MASK;
+
+		if (offset & (alignment - 1) || offset + size > end) {
+			ret = -EINVAL;
+			goto err_free_vma;
+		}
+		vma->node.start = offset;
+		vma->node.size = size;
+		vma->node.color = obj->cache_level;
+		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		if (ret) {
+			ret = i915_gem_evict_for_vma(vma);
+			if (ret == 0)
+				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+		}
+		if (ret)
+			goto err_free_vma;
 	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
-	}
+		if (flags & PIN_HIGH) {
+			search_flag = DRM_MM_SEARCH_BELOW;
+			alloc_flag = DRM_MM_CREATE_TOP;
+		} else {
+			search_flag = DRM_MM_SEARCH_DEFAULT;
+			alloc_flag = DRM_MM_CREATE_DEFAULT;
+		}
 
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
-						  size, alignment,
-						  obj->cache_level,
-						  start, end,
-						  search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(dev, vm, size, alignment,
-					       obj->cache_level,
-					       start, end,
-					       flags);
-		if (ret == 0)
-			goto search_free;
+		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+							  size, alignment,
+							  obj->cache_level,
+							  start, end,
+							  search_flag,
+							  alloc_flag);
+		if (ret) {
+			ret = i915_gem_evict_something(dev, vm, size, alignment,
+						       obj->cache_level,
+						       start, end,
+						       flags);
+			if (ret == 0)
+				goto search_free;
 
-		goto err_free_vma;
+			goto err_free_vma;
+		}
 	}
 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
 		ret = -EINVAL;
@@ -4082,6 +4102,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
 	    vma->node.start < (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_OFFSET_FIXED &&
+	    vma->node.start != (flags & PIN_OFFSET_MASK))
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d71a133ceff5..07c6e4d320c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,45 @@ found:
 	return ret;
 }
 
+int
+i915_gem_evict_for_vma(struct i915_vma *target)
+{
+	struct drm_mm_node *node, *next;
+
+	list_for_each_entry_safe(node, next,
+			&target->vm->mm.head_node.node_list,
+			node_list) {
+		struct i915_vma *vma;
+		int ret;
+
+		if (node->start + node->size <= target->node.start)
+			continue;
+		if (node->start >= target->node.start + target->node.size)
+			break;
+
+		vma = container_of(node, typeof(*vma), node);
+
+		if (vma->pin_count) {
+			if (!vma->exec_entry || (vma->pin_count > 1))
+				/* Object is pinned for some other use */
+				return -EBUSY;
+
+			/* We need to evict a buffer in the same batch */
+			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+				/* Overlapping fixed objects in the same batch */
+				return -EINVAL;
+
+			return -ENOSPC;
+		}
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a4c243cec4aa..48ec4846e6f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			flags |= entry->offset | PIN_OFFSET_FIXED;
 		if ((flags & PIN_MAPPABLE) == 0)
 			flags |= PIN_HIGH;
 	}
@@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -695,6 +701,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	struct i915_vma *vma;
 	struct i915_address_space *vm;
 	struct list_head ordered_vmas;
+	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
@@ -703,6 +710,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
+	INIT_LIST_HEAD(&pinned_vmas);
 	while (!list_empty(vmas)) {
 		struct drm_i915_gem_exec_object2 *entry;
 		bool need_fence, need_mappable;
@@ -721,7 +729,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			list_move_tail(&vma->exec_list, &pinned_vmas);
+		else if (need_mappable) {
 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
 		} else
@@ -731,6 +741,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj->base.pending_write_domain = 0;
 	}
 	list_splice(&ordered_vmas, vmas);
+	list_splice(&pinned_vmas, vmas);
 
 	/* Attempt to pin all of the buffers into the GTT.
 	 * This is done in 3 phases:
@@ -1317,7 +1328,8 @@ eb_get_batch(struct eb_vmas *eb)
 	 * Note that actual hangs have only been observed on gen7, but for
 	 * paranoia do it everywhere.
 	 */
-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 
 	return vma->obj;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 67ef73a5d6eb..d727b49f07ac 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_EU_TOTAL		 34
 #define I915_PARAM_HAS_GPU_RESET	 35
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
 	__u64 alignment;
 
 	/**
-	 * Returned value of the updated offset of the object, for future
-	 * presumed_offset writes.
+	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+	 * the user with the GTT offset at which this object will be pinned.
+	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
+	 * presumed_offset of the object.
+	 * During execbuffer2 the kernel populates it with the value of the
+	 * current GTT offset of the object, for future presumed_offset writes.
 	 */
 	__u64 offset;
 
@@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
+#define EXEC_OBJECT_PINNED	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
-- 
cgit v1.2.3-58-ga151


From 1a2a42c8bfa25ef0f7e14c5973c6a0bebe625a96 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 31 Aug 2014 12:58:46 +0200
Subject: drm.h: use __kernel_size_t instead of size_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fall back to size_t for non Linux platforms.

Fixes userspace compilation error:

drm/drm.h:132:2: error: unknown type name ‘size_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/drm.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 3801584a0c53..b4e92eb12044 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -54,6 +54,7 @@ typedef int32_t  __s32;
 typedef uint32_t __u32;
 typedef int64_t  __s64;
 typedef uint64_t __u64;
+typedef size_t   __kernel_size_t;
 typedef unsigned long drm_handle_t;
 
 #endif
@@ -129,11 +130,11 @@ struct drm_version {
 	int version_major;	  /**< Major version */
 	int version_minor;	  /**< Minor version */
 	int version_patchlevel;	  /**< Patch level */
-	size_t name_len;	  /**< Length of name buffer */
+	__kernel_size_t name_len;	  /**< Length of name buffer */
 	char __user *name;	  /**< Name of driver */
-	size_t date_len;	  /**< Length of date buffer */
+	__kernel_size_t date_len;	  /**< Length of date buffer */
 	char __user *date;	  /**< User-space buffer to hold date */
-	size_t desc_len;	  /**< Length of desc buffer */
+	__kernel_size_t desc_len;	  /**< Length of desc buffer */
 	char __user *desc;	  /**< User-space buffer to hold desc */
 };
 
@@ -143,7 +144,7 @@ struct drm_version {
  * \sa drmGetBusid() and drmSetBusId().
  */
 struct drm_unique {
-	size_t unique_len;	  /**< Length of unique */
+	__kernel_size_t unique_len;	  /**< Length of unique */
 	char __user *unique;	  /**< Unique name for driver instantiation */
 };
 
-- 
cgit v1.2.3-58-ga151


From 05623b7fcaf20886721307de10547d3f91235186 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 31 Aug 2014 13:01:09 +0200
Subject: drm_mode.h: use __u32 and __u64 from linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation error:

drm/drm_mode.h:472:2: error: unknown type name ‘uint32_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/drm_mode.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 6c11ca401de8..44ad794960ee 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -526,14 +526,14 @@ struct drm_mode_crtc_page_flip {
 
 /* create a dumb scanout buffer */
 struct drm_mode_create_dumb {
-	uint32_t height;
-	uint32_t width;
-	uint32_t bpp;
-	uint32_t flags;
+	__u32 height;
+	__u32 width;
+	__u32 bpp;
+	__u32 flags;
 	/* handle, pitch, size will be returned */
-	uint32_t handle;
-	uint32_t pitch;
-	uint64_t size;
+	__u32 handle;
+	__u32 pitch;
+	__u64 size;
 };
 
 /* set up for mmap of a dumb scanout buffer */
@@ -550,7 +550,7 @@ struct drm_mode_map_dumb {
 };
 
 struct drm_mode_destroy_dumb {
-	uint32_t handle;
+	__u32 handle;
 };
 
 /* page-flip flags are valid, plus: */
-- 
cgit v1.2.3-58-ga151


From 6615b20f1e4c35a62f7d12d38b7f56dc63dbdece Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 31 Aug 2014 13:07:31 +0200
Subject: exynos_drm.h: use __u64 from linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation error:

drm/exynos_drm.h:30:2: error: unknown type name ‘uint64_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/exynos_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index 5575ed1598bd..cf431b76c320 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -27,7 +27,7 @@
  *	- this handle will be set by gem module of kernel side.
  */
 struct drm_exynos_gem_create {
-	uint64_t size;
+	__u64 size;
 	unsigned int flags;
 	unsigned int handle;
 };
@@ -44,7 +44,7 @@ struct drm_exynos_gem_create {
 struct drm_exynos_gem_info {
 	unsigned int handle;
 	unsigned int flags;
-	uint64_t size;
+	__u64 size;
 };
 
 /**
@@ -58,7 +58,7 @@ struct drm_exynos_gem_info {
 struct drm_exynos_vidi_connection {
 	unsigned int connection;
 	unsigned int extensions;
-	uint64_t edid;
+	__u64 edid;
 };
 
 /* memory type definitions. */
-- 
cgit v1.2.3-58-ga151


From 8860487ef39bef5765354ec7dd195983b49f9349 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 31 Aug 2014 13:09:51 +0200
Subject: nouveau_drm.h: use __u32 and __u64 from linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation errors like:

drm/nouveau_drm.h:41:2: error: unknown type name ‘uint32_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/nouveau_drm.h | 86 +++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 42 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index fd594cc73cc0..500d82aecbe4 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -27,6 +27,8 @@
 
 #define DRM_NOUVEAU_EVENT_NVIF                                       0x80000000
 
+#include <drm/drm.h>
+
 #define NOUVEAU_GEM_DOMAIN_CPU       (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
@@ -41,34 +43,34 @@
 #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
 
 struct drm_nouveau_gem_info {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t size;
-	uint64_t offset;
-	uint64_t map_handle;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
+	__u32 handle;
+	__u32 domain;
+	__u64 size;
+	__u64 offset;
+	__u64 map_handle;
+	__u32 tile_mode;
+	__u32 tile_flags;
 };
 
 struct drm_nouveau_gem_new {
 	struct drm_nouveau_gem_info info;
-	uint32_t channel_hint;
-	uint32_t align;
+	__u32 channel_hint;
+	__u32 align;
 };
 
 #define NOUVEAU_GEM_MAX_BUFFERS 1024
 struct drm_nouveau_gem_pushbuf_bo_presumed {
-	uint32_t valid;
-	uint32_t domain;
-	uint64_t offset;
+	__u32 valid;
+	__u32 domain;
+	__u64 offset;
 };
 
 struct drm_nouveau_gem_pushbuf_bo {
-	uint64_t user_priv;
-	uint32_t handle;
-	uint32_t read_domains;
-	uint32_t write_domains;
-	uint32_t valid_domains;
+	__u64 user_priv;
+	__u32 handle;
+	__u32 read_domains;
+	__u32 write_domains;
+	__u32 valid_domains;
 	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
@@ -77,46 +79,46 @@ struct drm_nouveau_gem_pushbuf_bo {
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
 #define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
-	uint32_t reloc_bo_index;
-	uint32_t reloc_bo_offset;
-	uint32_t bo_index;
-	uint32_t flags;
-	uint32_t data;
-	uint32_t vor;
-	uint32_t tor;
+	__u32 reloc_bo_index;
+	__u32 reloc_bo_offset;
+	__u32 bo_index;
+	__u32 flags;
+	__u32 data;
+	__u32 vor;
+	__u32 tor;
 };
 
 #define NOUVEAU_GEM_MAX_PUSH 512
 struct drm_nouveau_gem_pushbuf_push {
-	uint32_t bo_index;
-	uint32_t pad;
-	uint64_t offset;
-	uint64_t length;
+	__u32 bo_index;
+	__u32 pad;
+	__u64 offset;
+	__u64 length;
 };
 
 struct drm_nouveau_gem_pushbuf {
-	uint32_t channel;
-	uint32_t nr_buffers;
-	uint64_t buffers;
-	uint32_t nr_relocs;
-	uint32_t nr_push;
-	uint64_t relocs;
-	uint64_t push;
-	uint32_t suffix0;
-	uint32_t suffix1;
-	uint64_t vram_available;
-	uint64_t gart_available;
+	__u32 channel;
+	__u32 nr_buffers;
+	__u64 buffers;
+	__u32 nr_relocs;
+	__u32 nr_push;
+	__u64 relocs;
+	__u64 push;
+	__u32 suffix0;
+	__u32 suffix1;
+	__u64 vram_available;
+	__u64 gart_available;
 };
 
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
-	uint32_t handle;
-	uint32_t flags;
+	__u32 handle;
+	__u32 flags;
 };
 
 struct drm_nouveau_gem_cpu_fini {
-	uint32_t handle;
+	__u32 handle;
 };
 
 #define DRM_NOUVEAU_GETPARAM           0x00 /* deprecated */
-- 
cgit v1.2.3-58-ga151


From 31b4dfe24e903e995a32f17e9a9cafbbecabc77a Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 31 Aug 2014 13:11:54 +0200
Subject: radeon_drm.h: use __u32 and __u64 from linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compiler error:

drm/radeon_drm.h:794:2: error: unknown type name ‘uint64_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/radeon_drm.h | 128 +++++++++++++++++++++---------------------
 1 file changed, 64 insertions(+), 64 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 01aa2a8e3f8d..ccb9bcd82685 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -793,9 +793,9 @@ typedef struct drm_radeon_surface_free {
 #define RADEON_GEM_DOMAIN_VRAM		0x4
 
 struct drm_radeon_gem_info {
-	uint64_t	gart_size;
-	uint64_t	vram_size;
-	uint64_t	vram_visible;
+	__u64	gart_size;
+	__u64	vram_size;
+	__u64	vram_visible;
 };
 
 #define RADEON_GEM_NO_BACKING_STORE	(1 << 0)
@@ -807,11 +807,11 @@ struct drm_radeon_gem_info {
 #define RADEON_GEM_NO_CPU_ACCESS	(1 << 4)
 
 struct drm_radeon_gem_create {
-	uint64_t	size;
-	uint64_t	alignment;
-	uint32_t	handle;
-	uint32_t	initial_domain;
-	uint32_t	flags;
+	__u64	size;
+	__u64	alignment;
+	__u32	handle;
+	__u32	initial_domain;
+	__u32	flags;
 };
 
 /*
@@ -825,10 +825,10 @@ struct drm_radeon_gem_create {
 #define RADEON_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_radeon_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
-	uint32_t		flags;
-	uint32_t		handle;
+	__u64		addr;
+	__u64		size;
+	__u32		flags;
+	__u32		handle;
 };
 
 #define RADEON_TILING_MACRO				0x1
@@ -850,72 +850,72 @@ struct drm_radeon_gem_userptr {
 #define RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK	0xf
 
 struct drm_radeon_gem_set_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_get_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_mmap {
-	uint32_t	handle;
-	uint32_t	pad;
-	uint64_t	offset;
-	uint64_t	size;
-	uint64_t	addr_ptr;
+	__u32	handle;
+	__u32	pad;
+	__u64	offset;
+	__u64	size;
+	__u64	addr_ptr;
 };
 
 struct drm_radeon_gem_set_domain {
-	uint32_t	handle;
-	uint32_t	read_domains;
-	uint32_t	write_domain;
+	__u32	handle;
+	__u32	read_domains;
+	__u32	write_domain;
 };
 
 struct drm_radeon_gem_wait_idle {
-	uint32_t	handle;
-	uint32_t	pad;
+	__u32	handle;
+	__u32	pad;
 };
 
 struct drm_radeon_gem_busy {
-	uint32_t	handle;
-	uint32_t        domain;
+	__u32	handle;
+	__u32        domain;
 };
 
 struct drm_radeon_gem_pread {
 	/** Handle for the object being read. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to read from */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to read */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to write the data into. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 struct drm_radeon_gem_pwrite {
 	/** Handle for the object being written to. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to write to */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to write */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to read the data from. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 /* Sets or returns a value associated with a buffer. */
 struct drm_radeon_gem_op {
-	uint32_t	handle; /* buffer */
-	uint32_t	op;     /* RADEON_GEM_OP_* */
-	uint64_t	value;  /* input or return value */
+	__u32	handle; /* buffer */
+	__u32	op;     /* RADEON_GEM_OP_* */
+	__u64	value;  /* input or return value */
 };
 
 #define RADEON_GEM_OP_GET_INITIAL_DOMAIN	0
@@ -935,11 +935,11 @@ struct drm_radeon_gem_op {
 #define RADEON_VM_PAGE_SNOOPED		(1 << 4)
 
 struct drm_radeon_gem_va {
-	uint32_t		handle;
-	uint32_t		operation;
-	uint32_t		vm_id;
-	uint32_t		flags;
-	uint64_t		offset;
+	__u32		handle;
+	__u32		operation;
+	__u32		vm_id;
+	__u32		flags;
+	__u64		offset;
 };
 
 #define RADEON_CHUNK_ID_RELOCS	0x01
@@ -961,29 +961,29 @@ struct drm_radeon_gem_va {
 /* 0 = normal, + = higher priority, - = lower priority */
 
 struct drm_radeon_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 /* drm_radeon_cs_reloc.flags */
 #define RADEON_RELOC_PRIO_MASK		(0xf << 0)
 
 struct drm_radeon_cs_reloc {
-	uint32_t		handle;
-	uint32_t		read_domains;
-	uint32_t		write_domain;
-	uint32_t		flags;
+	__u32		handle;
+	__u32		read_domains;
+	__u32		write_domain;
+	__u32		flags;
 };
 
 struct drm_radeon_cs {
-	uint32_t		num_chunks;
-	uint32_t		cs_id;
-	/* this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		num_chunks;
+	__u32		cs_id;
+	/* this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 	/* updates to the limits after this CS ioctl */
-	uint64_t		gart_limit;
-	uint64_t		vram_limit;
+	__u64		gart_limit;
+	__u64		vram_limit;
 };
 
 #define RADEON_INFO_DEVICE_ID		0x00
@@ -1042,9 +1042,9 @@ struct drm_radeon_cs {
 #define RADEON_INFO_GPU_RESET_COUNTER	0x26
 
 struct drm_radeon_info {
-	uint32_t		request;
-	uint32_t		pad;
-	uint64_t		value;
+	__u32		request;
+	__u32		pad;
+	__u64		value;
 };
 
 /* Those correspond to the tile index to use, this is to explicitly state
-- 
cgit v1.2.3-58-ga151


From 5b8573f7a0e97bbb444c51d129fe10edfde08b40 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 31 Aug 2014 16:20:20 +0200
Subject: via_drm.h: don't include non-existing via_drmclient.h

Fixes compiler error:

drm/via_drm.h:36:27: fatal error: via_drmclient.h: No such file or directory

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/via_drm.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/via_drm.h b/include/uapi/drm/via_drm.h
index 45bc80c3714b..c8c053a12e93 100644
--- a/include/uapi/drm/via_drm.h
+++ b/include/uapi/drm/via_drm.h
@@ -33,9 +33,6 @@
 #ifndef _VIA_DEFINES_
 #define _VIA_DEFINES_
 
-#ifndef __KERNEL__
-#include "via_drmclient.h"
-#endif
 
 #define VIA_NR_SAREA_CLIPRECTS		8
 #define VIA_NR_XVMC_PORTS               10
-- 
cgit v1.2.3-58-ga151


From 21de2fda3cb31ed3ef1807310ac982a1b8fe7089 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Wed, 11 Mar 2015 03:07:51 +0100
Subject: include/uapi/drm/vmwgfx_drm.h: use __s32, __u32 and __u64 from
 linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation errors like:

error: unknown type name ‘uint32_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/vmwgfx_drm.h | 264 +++++++++++++++++++++---------------------
 1 file changed, 132 insertions(+), 132 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 05b204954d16..ef31f448fecf 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -111,9 +111,9 @@ enum drm_vmw_handle_type {
  */
 
 struct drm_vmw_getparam_arg {
-	uint64_t value;
-	uint32_t param;
-	uint32_t pad64;
+	__u64 value;
+	__u32 param;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -134,8 +134,8 @@ struct drm_vmw_getparam_arg {
  */
 
 struct drm_vmw_context_arg {
-	int32_t cid;
-	uint32_t pad64;
+	__s32 cid;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -165,7 +165,7 @@ struct drm_vmw_context_arg {
  * @mip_levels: Number of mip levels for each face.
  * An unused face should have 0 encoded.
  * @size_addr: Address of a user-space array of sruct drm_vmw_size
- * cast to an uint64_t for 32-64 bit compatibility.
+ * cast to an __u64 for 32-64 bit compatibility.
  * The size of the array should equal the total number of mipmap levels.
  * @shareable: Boolean whether other clients (as identified by file descriptors)
  * may reference this surface.
@@ -177,12 +177,12 @@ struct drm_vmw_context_arg {
  */
 
 struct drm_vmw_surface_create_req {
-	uint32_t flags;
-	uint32_t format;
-	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
-	uint64_t size_addr;
-	int32_t shareable;
-	int32_t scanout;
+	__u32 flags;
+	__u32 format;
+	__u32 mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	__u64 size_addr;
+	__s32 shareable;
+	__s32 scanout;
 };
 
 /**
@@ -197,7 +197,7 @@ struct drm_vmw_surface_create_req {
  */
 
 struct drm_vmw_surface_arg {
-	int32_t sid;
+	__s32 sid;
 	enum drm_vmw_handle_type handle_type;
 };
 
@@ -213,10 +213,10 @@ struct drm_vmw_surface_arg {
  */
 
 struct drm_vmw_size {
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint32_t pad64;
+	__u32 width;
+	__u32 height;
+	__u32 depth;
+	__u32 pad64;
 };
 
 /**
@@ -284,13 +284,13 @@ union drm_vmw_surface_reference_arg {
 /**
  * struct drm_vmw_execbuf_arg
  *
- * @commands: User-space address of a command buffer cast to an uint64_t.
+ * @commands: User-space address of a command buffer cast to an __u64.
  * @command-size: Size in bytes of the command buffer.
  * @throttle-us: Sleep until software is less than @throttle_us
  * microseconds ahead of hardware. The driver may round this value
  * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
- * uint64_t.
+ * __u64.
  * @version: Allows expanding the execbuf ioctl parameters without breaking
  * backwards compatibility, since user-space will always tell the kernel
  * which version it uses.
@@ -302,14 +302,14 @@ union drm_vmw_surface_reference_arg {
 #define DRM_VMW_EXECBUF_VERSION 2
 
 struct drm_vmw_execbuf_arg {
-	uint64_t commands;
-	uint32_t command_size;
-	uint32_t throttle_us;
-	uint64_t fence_rep;
-	uint32_t version;
-	uint32_t flags;
-	uint32_t context_handle;
-	uint32_t pad64;
+	__u64 commands;
+	__u32 command_size;
+	__u32 throttle_us;
+	__u64 fence_rep;
+	__u32 version;
+	__u32 flags;
+	__u32 context_handle;
+	__u32 pad64;
 };
 
 /**
@@ -338,12 +338,12 @@ struct drm_vmw_execbuf_arg {
  */
 
 struct drm_vmw_fence_rep {
-	uint32_t handle;
-	uint32_t mask;
-	uint32_t seqno;
-	uint32_t passed_seqno;
-	uint32_t pad64;
-	int32_t error;
+	__u32 handle;
+	__u32 mask;
+	__u32 seqno;
+	__u32 passed_seqno;
+	__u32 pad64;
+	__s32 error;
 };
 
 /*************************************************************************/
@@ -373,8 +373,8 @@ struct drm_vmw_fence_rep {
  */
 
 struct drm_vmw_alloc_dmabuf_req {
-	uint32_t size;
-	uint32_t pad64;
+	__u32 size;
+	__u32 pad64;
 };
 
 /**
@@ -391,11 +391,11 @@ struct drm_vmw_alloc_dmabuf_req {
  */
 
 struct drm_vmw_dmabuf_rep {
-	uint64_t map_handle;
-	uint32_t handle;
-	uint32_t cur_gmr_id;
-	uint32_t cur_gmr_offset;
-	uint32_t pad64;
+	__u64 map_handle;
+	__u32 handle;
+	__u32 cur_gmr_id;
+	__u32 cur_gmr_offset;
+	__u32 pad64;
 };
 
 /**
@@ -428,8 +428,8 @@ union drm_vmw_alloc_dmabuf_arg {
  */
 
 struct drm_vmw_unref_dmabuf_arg {
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -452,10 +452,10 @@ struct drm_vmw_unref_dmabuf_arg {
  */
 
 struct drm_vmw_rect {
-	int32_t x;
-	int32_t y;
-	uint32_t w;
-	uint32_t h;
+	__s32 x;
+	__s32 y;
+	__u32 w;
+	__u32 h;
 };
 
 /**
@@ -477,21 +477,21 @@ struct drm_vmw_rect {
  */
 
 struct drm_vmw_control_stream_arg {
-	uint32_t stream_id;
-	uint32_t enabled;
+	__u32 stream_id;
+	__u32 enabled;
 
-	uint32_t flags;
-	uint32_t color_key;
+	__u32 flags;
+	__u32 color_key;
 
-	uint32_t handle;
-	uint32_t offset;
-	int32_t format;
-	uint32_t size;
-	uint32_t width;
-	uint32_t height;
-	uint32_t pitch[3];
+	__u32 handle;
+	__u32 offset;
+	__s32 format;
+	__u32 size;
+	__u32 width;
+	__u32 height;
+	__u32 pitch[3];
 
-	uint32_t pad64;
+	__u32 pad64;
 	struct drm_vmw_rect src;
 	struct drm_vmw_rect dst;
 };
@@ -519,12 +519,12 @@ struct drm_vmw_control_stream_arg {
  */
 
 struct drm_vmw_cursor_bypass_arg {
-	uint32_t flags;
-	uint32_t crtc_id;
-	int32_t xpos;
-	int32_t ypos;
-	int32_t xhot;
-	int32_t yhot;
+	__u32 flags;
+	__u32 crtc_id;
+	__s32 xpos;
+	__s32 ypos;
+	__s32 xhot;
+	__s32 yhot;
 };
 
 /*************************************************************************/
@@ -542,8 +542,8 @@ struct drm_vmw_cursor_bypass_arg {
  */
 
 struct drm_vmw_stream_arg {
-	uint32_t stream_id;
-	uint32_t pad64;
+	__u32 stream_id;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -565,7 +565,7 @@ struct drm_vmw_stream_arg {
 /**
  * struct drm_vmw_get_3d_cap_arg
  *
- * @buffer: Pointer to a buffer for capability data, cast to an uint64_t
+ * @buffer: Pointer to a buffer for capability data, cast to an __u64
  * @size: Max size to copy
  *
  * Input argument to the DRM_VMW_GET_3D_CAP_IOCTL
@@ -573,9 +573,9 @@ struct drm_vmw_stream_arg {
  */
 
 struct drm_vmw_get_3d_cap_arg {
-	uint64_t buffer;
-	uint32_t max_size;
-	uint32_t pad64;
+	__u64 buffer;
+	__u32 max_size;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -624,14 +624,14 @@ struct drm_vmw_get_3d_cap_arg {
  */
 
 struct drm_vmw_fence_wait_arg {
-	uint32_t handle;
-	int32_t  cookie_valid;
-	uint64_t kernel_cookie;
-	uint64_t timeout_us;
-	int32_t lazy;
-	int32_t flags;
-	int32_t wait_options;
-	int32_t pad64;
+	__u32 handle;
+	__s32  cookie_valid;
+	__u64 kernel_cookie;
+	__u64 timeout_us;
+	__s32 lazy;
+	__s32 flags;
+	__s32 wait_options;
+	__s32 pad64;
 };
 
 /*************************************************************************/
@@ -655,12 +655,12 @@ struct drm_vmw_fence_wait_arg {
  */
 
 struct drm_vmw_fence_signaled_arg {
-	 uint32_t handle;
-	 uint32_t flags;
-	 int32_t signaled;
-	 uint32_t passed_seqno;
-	 uint32_t signaled_flags;
-	 uint32_t pad64;
+	 __u32 handle;
+	 __u32 flags;
+	 __s32 signaled;
+	 __u32 passed_seqno;
+	 __u32 signaled_flags;
+	 __u32 pad64;
 };
 
 /*************************************************************************/
@@ -681,8 +681,8 @@ struct drm_vmw_fence_signaled_arg {
  */
 
 struct drm_vmw_fence_arg {
-	 uint32_t handle;
-	 uint32_t pad64;
+	 __u32 handle;
+	 __u32 pad64;
 };
 
 
@@ -703,9 +703,9 @@ struct drm_vmw_fence_arg {
 
 struct drm_vmw_event_fence {
 	struct drm_event base;
-	uint64_t user_data;
-	uint32_t tv_sec;
-	uint32_t tv_usec;
+	__u64 user_data;
+	__u32 tv_sec;
+	__u32 tv_usec;
 };
 
 /*
@@ -717,17 +717,17 @@ struct drm_vmw_event_fence {
 /**
  * struct drm_vmw_fence_event_arg
  *
- * @fence_rep: Pointer to fence_rep structure cast to uint64_t or 0 if
+ * @fence_rep: Pointer to fence_rep structure cast to __u64 or 0 if
  * the fence is not supposed to be referenced by user-space.
  * @user_info: Info to be delivered with the event.
  * @handle: Attach the event to this fence only.
  * @flags: A set of flags as defined above.
  */
 struct drm_vmw_fence_event_arg {
-	uint64_t fence_rep;
-	uint64_t user_data;
-	uint32_t handle;
-	uint32_t flags;
+	__u64 fence_rep;
+	__u64 user_data;
+	__u32 handle;
+	__u32 flags;
 };
 
 
@@ -747,7 +747,7 @@ struct drm_vmw_fence_event_arg {
  * @sid: Surface id to present from.
  * @dest_x: X placement coordinate for surface.
  * @dest_y: Y placement coordinate for surface.
- * @clips_ptr: Pointer to an array of clip rects cast to an uint64_t.
+ * @clips_ptr: Pointer to an array of clip rects cast to an __u64.
  * @num_clips: Number of cliprects given relative to the framebuffer origin,
  * in the same coordinate space as the frame buffer.
  * @pad64: Unused 64-bit padding.
@@ -756,13 +756,13 @@ struct drm_vmw_fence_event_arg {
  */
 
 struct drm_vmw_present_arg {
-	uint32_t fb_id;
-	uint32_t sid;
-	int32_t dest_x;
-	int32_t dest_y;
-	uint64_t clips_ptr;
-	uint32_t num_clips;
-	uint32_t pad64;
+	__u32 fb_id;
+	__u32 sid;
+	__s32 dest_x;
+	__s32 dest_y;
+	__u64 clips_ptr;
+	__u32 num_clips;
+	__u32 pad64;
 };
 
 
@@ -780,16 +780,16 @@ struct drm_vmw_present_arg {
  * struct drm_vmw_present_arg
  * @fb_id: fb_id to present / read back from.
  * @num_clips: Number of cliprects.
- * @clips_ptr: Pointer to an array of clip rects cast to an uint64_t.
- * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an uint64_t.
+ * @clips_ptr: Pointer to an array of clip rects cast to an __u64.
+ * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an __u64.
  * If this member is NULL, then the ioctl should not return a fence.
  */
 
 struct drm_vmw_present_readback_arg {
-	 uint32_t fb_id;
-	 uint32_t num_clips;
-	 uint64_t clips_ptr;
-	 uint64_t fence_rep;
+	 __u32 fb_id;
+	 __u32 num_clips;
+	 __u64 clips_ptr;
+	 __u64 fence_rep;
 };
 
 /*************************************************************************/
@@ -805,14 +805,14 @@ struct drm_vmw_present_readback_arg {
  * struct drm_vmw_update_layout_arg
  *
  * @num_outputs: number of active connectors
- * @rects: pointer to array of drm_vmw_rect cast to an uint64_t
+ * @rects: pointer to array of drm_vmw_rect cast to an __u64
  *
  * Input argument to the DRM_VMW_UPDATE_LAYOUT Ioctl.
  */
 struct drm_vmw_update_layout_arg {
-	uint32_t num_outputs;
-	uint32_t pad64;
-	uint64_t rects;
+	__u32 num_outputs;
+	__u32 pad64;
+	__u64 rects;
 };
 
 
@@ -849,10 +849,10 @@ enum drm_vmw_shader_type {
  */
 struct drm_vmw_shader_create_arg {
 	enum drm_vmw_shader_type shader_type;
-	uint32_t size;
-	uint32_t buffer_handle;
-	uint32_t shader_handle;
-	uint64_t offset;
+	__u32 size;
+	__u32 buffer_handle;
+	__u32 shader_handle;
+	__u64 offset;
 };
 
 /*************************************************************************/
@@ -871,8 +871,8 @@ struct drm_vmw_shader_create_arg {
  * Input argument to the DRM_VMW_UNREF_SHADER ioctl.
  */
 struct drm_vmw_shader_arg {
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -918,14 +918,14 @@ enum drm_vmw_surface_flags {
  * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl.
  */
 struct drm_vmw_gb_surface_create_req {
-	uint32_t svga3d_flags;
-	uint32_t format;
-	uint32_t mip_levels;
+	__u32 svga3d_flags;
+	__u32 format;
+	__u32 mip_levels;
 	enum drm_vmw_surface_flags drm_surface_flags;
-	uint32_t multisample_count;
-	uint32_t autogen_filter;
-	uint32_t buffer_handle;
-	uint32_t array_size;
+	__u32 multisample_count;
+	__u32 autogen_filter;
+	__u32 buffer_handle;
+	__u32 array_size;
 	struct drm_vmw_size base_size;
 };
 
@@ -944,11 +944,11 @@ struct drm_vmw_gb_surface_create_req {
  * Output argument for the DRM_VMW_GB_SURFACE_CREATE ioctl.
  */
 struct drm_vmw_gb_surface_create_rep {
-	uint32_t handle;
-	uint32_t backup_size;
-	uint32_t buffer_handle;
-	uint32_t buffer_size;
-	uint64_t buffer_map_handle;
+	__u32 handle;
+	__u32 backup_size;
+	__u32 buffer_handle;
+	__u32 buffer_size;
+	__u64 buffer_map_handle;
 };
 
 /**
@@ -1061,8 +1061,8 @@ enum drm_vmw_synccpu_op {
 struct drm_vmw_synccpu_arg {
 	enum drm_vmw_synccpu_op op;
 	enum drm_vmw_synccpu_flags flags;
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
-- 
cgit v1.2.3-58-ga151


From 8e51012c4f2c6b4f2a318c71e1b43f552e6d2a2c Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Wed, 11 Mar 2015 03:12:23 +0100
Subject: include/uapi/drm/qxl_drm.h: use __s32, __u32 and __u64 from
 linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation errors like:

error: unknown type name ‘uint32_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/qxl_drm.h | 74 +++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h
index ebebd36c4117..dface362d729 100644
--- a/include/uapi/drm/qxl_drm.h
+++ b/include/uapi/drm/qxl_drm.h
@@ -30,7 +30,7 @@
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -48,14 +48,14 @@
 #define DRM_QXL_ALLOC_SURF  0x06
 
 struct drm_qxl_alloc {
-	uint32_t size;
-	uint32_t handle; /* 0 is an invalid handle */
+	__u32 size;
+	__u32 handle; /* 0 is an invalid handle */
 };
 
 struct drm_qxl_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 /*
@@ -68,59 +68,59 @@ struct drm_qxl_map {
 #define QXL_RELOC_TYPE_SURF 2
 
 struct drm_qxl_reloc {
-	uint64_t src_offset; /* offset into src_handle or src buffer */
-	uint64_t dst_offset; /* offset in dest handle */
-	uint32_t src_handle; /* dest handle to compute address from */
-	uint32_t dst_handle; /* 0 if to command buffer */
-	uint32_t reloc_type;
-	uint32_t pad;
+	__u64 src_offset; /* offset into src_handle or src buffer */
+	__u64 dst_offset; /* offset in dest handle */
+	__u32 src_handle; /* dest handle to compute address from */
+	__u32 dst_handle; /* 0 if to command buffer */
+	__u32 reloc_type;
+	__u32 pad;
 };
 
 struct drm_qxl_command {
-	uint64_t	 __user command; /* void* */
-	uint64_t	 __user relocs; /* struct drm_qxl_reloc* */
-	uint32_t		type;
-	uint32_t		command_size;
-	uint32_t		relocs_num;
-	uint32_t                pad;
+	__u64	 __user command; /* void* */
+	__u64	 __user relocs; /* struct drm_qxl_reloc* */
+	__u32		type;
+	__u32		command_size;
+	__u32		relocs_num;
+	__u32                pad;
 };
 
 /* XXX: call it drm_qxl_commands? */
 struct drm_qxl_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t		commands_num;
-	uint64_t	 __user commands;	/* struct drm_qxl_command* */
+	__u32		flags;		/* for future use */
+	__u32		commands_num;
+	__u64	 __user commands;	/* struct drm_qxl_command* */
 };
 
 struct drm_qxl_update_area {
-	uint32_t handle;
-	uint32_t top;
-	uint32_t left;
-	uint32_t bottom;
-	uint32_t right;
-	uint32_t pad;
+	__u32 handle;
+	__u32 top;
+	__u32 left;
+	__u32 bottom;
+	__u32 right;
+	__u32 pad;
 };
 
 #define QXL_PARAM_NUM_SURFACES 1 /* rom->n_surfaces */
 #define QXL_PARAM_MAX_RELOCS 2
 struct drm_qxl_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* these are one bit values */
 struct drm_qxl_clientcap {
-	uint32_t index;
-	uint32_t pad;
+	__u32 index;
+	__u32 pad;
 };
 
 struct drm_qxl_alloc_surf {
-	uint32_t format;
-	uint32_t width;
-	uint32_t height;
-	int32_t stride;
-	uint32_t handle;
-	uint32_t pad;
+	__u32 format;
+	__u32 width;
+	__u32 height;
+	__s32 stride;
+	__u32 handle;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_QXL_ALLOC \
-- 
cgit v1.2.3-58-ga151


From 89545d6d5cb0fab53211d6a36a8037c8020e1b7e Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Wed, 2 Dec 2015 23:17:47 +0100
Subject: include/uapi/linux/virtio_gpu.h: use __u8 from <linux/types.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel headers exported to userspace are should these types.

Fixes userspace compilation error:

error: unknown type name ‘uint8_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/linux/virtio_gpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 7a63faa9065c..4b04ead26cd9 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -287,7 +287,7 @@ struct virtio_gpu_get_capset {
 /* VIRTIO_GPU_RESP_OK_CAPSET */
 struct virtio_gpu_resp_capset {
 	struct virtio_gpu_ctrl_hdr hdr;
-	uint8_t capset_data[];
+	__u8 capset_data[];
 };
 
 #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
-- 
cgit v1.2.3-58-ga151


From 35d0f1d54ecd813973091fdb17baae43ea2b60b9 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 1 Feb 2015 15:10:41 +0100
Subject: include/uapi/linux/agpgart.h: include stdlib.h in userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compiler error:
error: unknown type name ‘size_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/linux/agpgart.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/agpgart.h b/include/uapi/linux/agpgart.h
index 4e828cf487bc..f5251045181a 100644
--- a/include/uapi/linux/agpgart.h
+++ b/include/uapi/linux/agpgart.h
@@ -52,6 +52,7 @@
 
 #ifndef __KERNEL__
 #include <linux/types.h>
+#include <stdlib.h>
 
 struct agp_version {
 	__u16 major;
-- 
cgit v1.2.3-58-ga151


From 29e08b0518fcddda2ae7b5014dfe8ac4eaa6ece7 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:39 +0100
Subject: drm: use __u{32,64} instead of uint{32,64}_t in virtgpu_drm.h

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/virtgpu_drm.h | 98 +++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index fc9e2d6e5e2f..4bcfbe6be8d4 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -30,7 +30,7 @@
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -45,88 +45,88 @@
 #define DRM_VIRTGPU_GET_CAPS  0x09
 
 struct drm_virtgpu_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 struct drm_virtgpu_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t size;
-	uint64_t command; /* void* */
-	uint64_t bo_handles;
-	uint32_t num_bo_handles;
-	uint32_t pad;
+	__u32		flags;		/* for future use */
+	__u32 size;
+	__u64 command; /* void* */
+	__u64 bo_handles;
+	__u32 num_bo_handles;
+	__u32 pad;
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
 
 struct drm_virtgpu_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* NO_BO flags? NO resource flag? */
 /* resource flag for y_0_top */
 struct drm_virtgpu_resource_create {
-	uint32_t target;
-	uint32_t format;
-	uint32_t bind;
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint32_t array_size;
-	uint32_t last_level;
-	uint32_t nr_samples;
-	uint32_t flags;
-	uint32_t bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
-	uint32_t res_handle;  /* returned by kernel */
-	uint32_t size;        /* validate transfer in the host */
-	uint32_t stride;      /* validate transfer in the host */
+	__u32 target;
+	__u32 format;
+	__u32 bind;
+	__u32 width;
+	__u32 height;
+	__u32 depth;
+	__u32 array_size;
+	__u32 last_level;
+	__u32 nr_samples;
+	__u32 flags;
+	__u32 bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
+	__u32 res_handle;  /* returned by kernel */
+	__u32 size;        /* validate transfer in the host */
+	__u32 stride;      /* validate transfer in the host */
 };
 
 struct drm_virtgpu_resource_info {
-	uint32_t bo_handle;
-	uint32_t res_handle;
-	uint32_t size;
-	uint32_t stride;
+	__u32 bo_handle;
+	__u32 res_handle;
+	__u32 size;
+	__u32 stride;
 };
 
 struct drm_virtgpu_3d_box {
-	uint32_t x;
-	uint32_t y;
-	uint32_t z;
-	uint32_t w;
-	uint32_t h;
-	uint32_t d;
+	__u32 x;
+	__u32 y;
+	__u32 z;
+	__u32 w;
+	__u32 h;
+	__u32 d;
 };
 
 struct drm_virtgpu_3d_transfer_to_host {
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	struct drm_virtgpu_3d_box box;
-	uint32_t level;
-	uint32_t offset;
+	__u32 level;
+	__u32 offset;
 };
 
 struct drm_virtgpu_3d_transfer_from_host {
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	struct drm_virtgpu_3d_box box;
-	uint32_t level;
-	uint32_t offset;
+	__u32 level;
+	__u32 offset;
 };
 
 #define VIRTGPU_WAIT_NOWAIT 1 /* like it */
 struct drm_virtgpu_3d_wait {
-	uint32_t handle; /* 0 is an invalid handle */
-	uint32_t flags;
+	__u32 handle; /* 0 is an invalid handle */
+	__u32 flags;
 };
 
 struct drm_virtgpu_get_caps {
-	uint32_t cap_set_id;
-	uint32_t cap_set_ver;
-	uint64_t addr;
-	uint32_t size;
-	uint32_t pad;
+	__u32 cap_set_id;
+	__u32 cap_set_ver;
+	__u64 addr;
+	__u32 size;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_VIRTGPU_MAP \
-- 
cgit v1.2.3-58-ga151


From f95d3aa438e15caa3a7c2d82b45c9df0ae8bc3a3 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:40 +0100
Subject: drm: Kbuild: add admgpu_drm.h to the installed headers

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index 38d437096c35..159551f49313 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -3,6 +3,7 @@ header-y += drm.h
 header-y += drm_fourcc.h
 header-y += drm_mode.h
 header-y += drm_sarea.h
+header-y += amdgpu_drm.h
 header-y += exynos_drm.h
 header-y += i810_drm.h
 header-y += i915_drm.h
-- 
cgit v1.2.3-58-ga151


From 2ce9dde0d47f2f94ab25c73a30596a7328bcdf1f Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Wed, 2 Dec 2015 23:44:33 +0100
Subject: include/uapi/drm/amdgpu_drm.h: use __u32 and __u64 from
 <linux/types.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel headers exported to userspace are supposed to use these.

Fixes compilation errors in userspace:

error: unknown type name ‘uint64_t’
error: unknown type name ‘uint32_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/amdgpu_drm.h | 290 +++++++++++++++++++++---------------------
 1 file changed, 145 insertions(+), 145 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index e52933a73580..453a76af123c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -76,19 +76,19 @@
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-	uint64_t bo_size;
+	__u64 bo_size;
 	/** physical start_addr alignment in bytes for some HW requirements */
-	uint64_t alignment;
+	__u64 alignment;
 	/** the requested memory domains */
-	uint64_t domains;
+	__u64 domains;
 	/** allocation flags */
-	uint64_t domain_flags;
+	__u64 domain_flags;
 };
 
 struct drm_amdgpu_gem_create_out  {
 	/** returned GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_gem_create {
@@ -105,28 +105,28 @@ union drm_amdgpu_gem_create {
 
 struct drm_amdgpu_bo_list_in {
 	/** Type of operation */
-	uint32_t operation;
+	__u32 operation;
 	/** Handle of list or 0 if we want to create one */
-	uint32_t list_handle;
+	__u32 list_handle;
 	/** Number of BOs in list  */
-	uint32_t bo_number;
+	__u32 bo_number;
 	/** Size of each element describing BO */
-	uint32_t bo_info_size;
+	__u32 bo_info_size;
 	/** Pointer to array describing BOs */
-	uint64_t bo_info_ptr;
+	__u64 bo_info_ptr;
 };
 
 struct drm_amdgpu_bo_list_entry {
 	/** Handle of BO */
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	/** New (if specified) BO priority to be used during migration */
-	uint32_t bo_priority;
+	__u32 bo_priority;
 };
 
 struct drm_amdgpu_bo_list_out {
 	/** Handle of resource list  */
-	uint32_t list_handle;
-	uint32_t _pad;
+	__u32 list_handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_bo_list {
@@ -150,26 +150,26 @@ union drm_amdgpu_bo_list {
 
 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** For future use, no flags defined so far */
-	uint32_t	flags;
-	uint32_t	ctx_id;
-	uint32_t	_pad;
+	__u32	flags;
+	__u32	ctx_id;
+	__u32	_pad;
 };
 
 union drm_amdgpu_ctx_out {
 		struct {
-			uint32_t	ctx_id;
-			uint32_t	_pad;
+			__u32	ctx_id;
+			__u32	_pad;
 		} alloc;
 
 		struct {
 			/** For future use, no flags defined so far */
-			uint64_t	flags;
+			__u64	flags;
 			/** Number of resets caused by this context so far. */
-			uint32_t	hangs;
+			__u32	hangs;
 			/** Reset status since the last call of the ioctl. */
-			uint32_t	reset_status;
+			__u32	reset_status;
 		} state;
 };
 
@@ -189,12 +189,12 @@ union drm_amdgpu_ctx {
 #define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_amdgpu_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
+	__u64		addr;
+	__u64		size;
 	/* AMDGPU_GEM_USERPTR_* */
-	uint32_t		flags;
+	__u32		flags;
 	/* Resulting GEM handle */
-	uint32_t		handle;
+	__u32		handle;
 };
 
 /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
@@ -226,28 +226,28 @@ struct drm_amdgpu_gem_userptr {
 /** The same structure is shared for input/output */
 struct drm_amdgpu_gem_metadata {
 	/** GEM Object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** Do we want get or set metadata */
-	uint32_t	op;
+	__u32	op;
 	struct {
 		/** For future use, no flags defined so far */
-		uint64_t	flags;
+		__u64	flags;
 		/** family specific tiling info */
-		uint64_t	tiling_info;
-		uint32_t	data_size_bytes;
-		uint32_t	data[64];
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
 	} data;
 };
 
 struct drm_amdgpu_gem_mmap_in {
 	/** the GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_gem_mmap_out {
 	/** mmap offset from the vma offset manager */
-	uint64_t addr_ptr;
+	__u64 addr_ptr;
 };
 
 union drm_amdgpu_gem_mmap {
@@ -257,18 +257,18 @@ union drm_amdgpu_gem_mmap {
 
 struct drm_amdgpu_gem_wait_idle_in {
 	/** GEM object handle */
-	uint32_t handle;
+	__u32 handle;
 	/** For future use, no flags defined so far */
-	uint32_t flags;
+	__u32 flags;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
+	__u64 timeout;
 };
 
 struct drm_amdgpu_gem_wait_idle_out {
 	/** BO status:  0 - BO is idle, 1 - BO is busy */
-	uint32_t status;
+	__u32 status;
 	/** Returned current memory domain */
-	uint32_t domain;
+	__u32 domain;
 };
 
 union drm_amdgpu_gem_wait_idle {
@@ -278,18 +278,18 @@ union drm_amdgpu_gem_wait_idle {
 
 struct drm_amdgpu_wait_cs_in {
 	/** Command submission handle */
-	uint64_t handle;
+	__u64 handle;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
 };
 
 struct drm_amdgpu_wait_cs_out {
 	/** CS status:  0 - CS completed, 1 - CS still busy */
-	uint64_t status;
+	__u64 status;
 };
 
 union drm_amdgpu_wait_cs {
@@ -303,11 +303,11 @@ union drm_amdgpu_wait_cs {
 /* Sets or returns a value associated with a buffer. */
 struct drm_amdgpu_gem_op {
 	/** GEM object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** AMDGPU_GEM_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** Input or return value */
-	uint64_t	value;
+	__u64	value;
 };
 
 #define AMDGPU_VA_OP_MAP			1
@@ -326,18 +326,18 @@ struct drm_amdgpu_gem_op {
 
 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 	/** AMDGPU_VA_OP_* */
-	uint32_t operation;
+	__u32 operation;
 	/** AMDGPU_VM_PAGE_* */
-	uint32_t flags;
+	__u32 flags;
 	/** va address to assign . Must be correctly aligned.*/
-	uint64_t va_address;
+	__u64 va_address;
 	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
-	uint64_t offset_in_bo;
+	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
-	uint64_t map_size;
+	__u64 map_size;
 };
 
 #define AMDGPU_HW_IP_GFX          0
@@ -354,24 +354,24 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 
 struct drm_amdgpu_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 struct drm_amdgpu_cs_in {
 	/** Rendering context id */
-	uint32_t		ctx_id;
+	__u32		ctx_id;
 	/**  Handle of resource list associated with CS */
-	uint32_t		bo_list_handle;
-	uint32_t		num_chunks;
-	uint32_t		_pad;
-	/** this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		_pad;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 };
 
 struct drm_amdgpu_cs_out {
-	uint64_t handle;
+	__u64 handle;
 };
 
 union drm_amdgpu_cs {
@@ -388,32 +388,32 @@ union drm_amdgpu_cs {
 #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
 
 struct drm_amdgpu_cs_chunk_ib {
-	uint32_t _pad;
+	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
-	uint32_t flags;
+	__u32 flags;
 	/** Virtual address to begin IB execution */
-	uint64_t va_start;
+	__u64 va_start;
 	/** Size of submission */
-	uint32_t ib_bytes;
+	__u32 ib_bytes;
 	/** HW IP to submit to */
-	uint32_t ip_type;
+	__u32 ip_type;
 	/** HW IP index of the same type to submit to  */
-	uint32_t ip_instance;
+	__u32 ip_instance;
 	/** Ring index to submit to */
-	uint32_t ring;
+	__u32 ring;
 };
 
 struct drm_amdgpu_cs_chunk_dep {
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
-	uint64_t handle;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
 };
 
 struct drm_amdgpu_cs_chunk_fence {
-	uint32_t handle;
-	uint32_t offset;
+	__u32 handle;
+	__u32 offset;
 };
 
 struct drm_amdgpu_cs_chunk_data {
@@ -486,83 +486,83 @@ struct drm_amdgpu_cs_chunk_data {
 /* Input structure for the INFO ioctl */
 struct drm_amdgpu_info {
 	/* Where the return value will be stored */
-	uint64_t return_pointer;
+	__u64 return_pointer;
 	/* The size of the return value. Just like "size" in "snprintf",
 	 * it limits how many bytes the kernel can write. */
-	uint32_t return_size;
+	__u32 return_size;
 	/* The query request id. */
-	uint32_t query;
+	__u32 query;
 
 	union {
 		struct {
-			uint32_t id;
-			uint32_t _pad;
+			__u32 id;
+			__u32 _pad;
 		} mode_crtc;
 
 		struct {
 			/** AMDGPU_HW_IP_* */
-			uint32_t type;
+			__u32 type;
 			/**
 			 * Index of the IP if there are more IPs of the same
 			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 		} query_hw_ip;
 
 		struct {
-			uint32_t dword_offset;
+			__u32 dword_offset;
 			/** number of registers to read */
-			uint32_t count;
-			uint32_t instance;
+			__u32 count;
+			__u32 instance;
 			/** For future use, no flags defined so far */
-			uint32_t flags;
+			__u32 flags;
 		} read_mmr_reg;
 
 		struct {
 			/** AMDGPU_INFO_FW_* */
-			uint32_t fw_type;
+			__u32 fw_type;
 			/**
 			 * Index of the IP if there are more IPs of
 			 * the same type.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 			/**
 			 * Index of the engine. Whether this is used depends
 			 * on the firmware type. (e.g. MEC, SDMA)
 			 */
-			uint32_t index;
-			uint32_t _pad;
+			__u32 index;
+			__u32 _pad;
 		} query_fw;
 	};
 };
 
 struct drm_amdgpu_info_gds {
 	/** GDS GFX partition size */
-	uint32_t gds_gfx_partition_size;
+	__u32 gds_gfx_partition_size;
 	/** GDS compute partition size */
-	uint32_t compute_partition_size;
+	__u32 compute_partition_size;
 	/** total GDS memory size */
-	uint32_t gds_total_size;
+	__u32 gds_total_size;
 	/** GWS size per GFX partition */
-	uint32_t gws_per_gfx_partition;
+	__u32 gws_per_gfx_partition;
 	/** GSW size per compute partition */
-	uint32_t gws_per_compute_partition;
+	__u32 gws_per_compute_partition;
 	/** OA size per GFX partition */
-	uint32_t oa_per_gfx_partition;
+	__u32 oa_per_gfx_partition;
 	/** OA size per compute partition */
-	uint32_t oa_per_compute_partition;
-	uint32_t _pad;
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_info_vram_gtt {
-	uint64_t vram_size;
-	uint64_t vram_cpu_accessible_size;
-	uint64_t gtt_size;
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
 };
 
 struct drm_amdgpu_info_firmware {
-	uint32_t ver;
-	uint32_t feature;
+	__u32 ver;
+	__u32 feature;
 };
 
 #define AMDGPU_VRAM_TYPE_UNKNOWN 0
@@ -576,61 +576,61 @@ struct drm_amdgpu_info_firmware {
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
-	uint32_t device_id;
+	__u32 device_id;
 	/** Internal chip revision: A0, A1, etc.) */
-	uint32_t chip_rev;
-	uint32_t external_rev;
+	__u32 chip_rev;
+	__u32 external_rev;
 	/** Revision id in PCI Config space */
-	uint32_t pci_rev;
-	uint32_t family;
-	uint32_t num_shader_engines;
-	uint32_t num_shader_arrays_per_engine;
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
 	/* in KHz */
-	uint32_t gpu_counter_freq;
-	uint64_t max_engine_clock;
-	uint64_t max_memory_clock;
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
 	/* cu information */
-	uint32_t cu_active_number;
-	uint32_t cu_ao_mask;
-	uint32_t cu_bitmap[4][4];
+	__u32 cu_active_number;
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
 	/** Render backend pipe mask. One render backend is CB+DB. */
-	uint32_t enabled_rb_pipes_mask;
-	uint32_t num_rb_pipes;
-	uint32_t num_hw_gfx_contexts;
-	uint32_t _pad;
-	uint64_t ids_flags;
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	__u32 _pad;
+	__u64 ids_flags;
 	/** Starting virtual address for UMDs. */
-	uint64_t virtual_address_offset;
+	__u64 virtual_address_offset;
 	/** The maximum virtual address */
-	uint64_t virtual_address_max;
+	__u64 virtual_address_max;
 	/** Required alignment of virtual addresses. */
-	uint32_t virtual_address_alignment;
+	__u32 virtual_address_alignment;
 	/** Page table entry - fragment size */
-	uint32_t pte_fragment_size;
-	uint32_t gart_page_size;
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
 	/** constant engine ram size*/
-	uint32_t ce_ram_size;
+	__u32 ce_ram_size;
 	/** video memory type info*/
-	uint32_t vram_type;
+	__u32 vram_type;
 	/** video memory bit width*/
-	uint32_t vram_bit_width;
+	__u32 vram_bit_width;
 	/* vce harvesting instance */
-	uint32_t vce_harvest_config;
+	__u32 vce_harvest_config;
 };
 
 struct drm_amdgpu_info_hw_ip {
 	/** Version of h/w IP */
-	uint32_t  hw_ip_version_major;
-	uint32_t  hw_ip_version_minor;
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
 	/** Capabilities */
-	uint64_t  capabilities_flags;
+	__u64  capabilities_flags;
 	/** command buffer address start alignment*/
-	uint32_t  ib_start_alignment;
+	__u32  ib_start_alignment;
 	/** command buffer size alignment*/
-	uint32_t  ib_size_alignment;
+	__u32  ib_size_alignment;
 	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
-	uint32_t  available_rings;
-	uint32_t  _pad;
+	__u32  available_rings;
+	__u32  _pad;
 };
 
 /*
-- 
cgit v1.2.3-58-ga151


From 6a14d01b91b07f354f11a3774755ed0afca2225b Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:41 +0100
Subject: drm: include drm.h in armada_drm.h

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/armada_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/armada_drm.h b/include/uapi/drm/armada_drm.h
index 8dec3fdc99c7..6de7f0196ca0 100644
--- a/include/uapi/drm/armada_drm.h
+++ b/include/uapi/drm/armada_drm.h
@@ -9,6 +9,8 @@
 #ifndef DRM_ARMADA_IOCTL_H
 #define DRM_ARMADA_IOCTL_H
 
+#include "drm.h"
+
 #define DRM_ARMADA_GEM_CREATE		0x00
 #define DRM_ARMADA_GEM_MMAP		0x02
 #define DRM_ARMADA_GEM_PWRITE		0x03
-- 
cgit v1.2.3-58-ga151


From 47f06c2f937f765042e8d41d7ee09f475a22e9d0 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:42 +0100
Subject: drm: drm_fourcc.h fix includes

Instead of using linux/types.h, drm headers should use drm.h, in order
to handle the portability issues in only one place.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/drm_fourcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 0b69a7753558..998bd253faad 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -24,7 +24,7 @@
 #ifndef DRM_FOURCC_H
 #define DRM_FOURCC_H
 
-#include <linux/types.h>
+#include "drm.h"
 
 #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \
 				 ((__u32)(c) << 16) | ((__u32)(d) << 24))
-- 
cgit v1.2.3-58-ga151


From abd5422db285f286a0d57747fb0edf192f880349 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:43 +0100
Subject: drm: drm_mode.h fix includes

Instead of using linux/types.h, drm headers should use drm.h, in order
to handle the portability issues in only one place.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/drm_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 44ad794960ee..50adb46204c2 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -27,7 +27,7 @@
 #ifndef _DRM_MODE_H
 #define _DRM_MODE_H
 
-#include <linux/types.h>
+#include "drm.h"
 
 #define DRM_DISPLAY_INFO_LEN	32
 #define DRM_CONNECTOR_NAME_LEN	32
-- 
cgit v1.2.3-58-ga151


From c6da66dff79e3e99903d4435292643665c30b0c6 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:44 +0100
Subject: drm: fix inclusion of drm.h in drm_sarea.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/drm_sarea.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_sarea.h b/include/uapi/drm/drm_sarea.h
index 413a5642d49f..1d1a858a203d 100644
--- a/include/uapi/drm/drm_sarea.h
+++ b/include/uapi/drm/drm_sarea.h
@@ -32,7 +32,7 @@
 #ifndef _DRM_SAREA_H_
 #define _DRM_SAREA_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* SAREA area needs to be at least a page */
 #if defined(__alpha__)
-- 
cgit v1.2.3-58-ga151


From 19b1e97aefde813f92a40699200fe1cf73f07da2 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:45 +0100
Subject: drm: fix inclusion of drm.h in exynos_sarea.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/exynos_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index cf431b76c320..312c67d744ae 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -15,7 +15,7 @@
 #ifndef _UAPI_EXYNOS_DRM_H_
 #define _UAPI_EXYNOS_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /**
  * User-desired buffer creation information structure.
-- 
cgit v1.2.3-58-ga151


From 13635d6120b9d6607fbf2622066bebfd7416e545 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:46 +0100
Subject: drm: fix inclusion of drm.h in i810_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for
the libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/i810_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/i810_drm.h b/include/uapi/drm/i810_drm.h
index 34736efd5824..bdb028723ded 100644
--- a/include/uapi/drm/i810_drm.h
+++ b/include/uapi/drm/i810_drm.h
@@ -1,7 +1,7 @@
 #ifndef _I810_DRM_H_
 #define _I810_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
-- 
cgit v1.2.3-58-ga151


From 1049102ff72b294bbeb1a254fe4de4a558387b46 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:47 +0100
Subject: drm: fix inclusion of drm.h in exynos_sarea.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/i915_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 484a9fb20479..07dcba2b002b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _UAPI_I915_DRM_H_
 #define _UAPI_I915_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
-- 
cgit v1.2.3-58-ga151


From 559003d067db195721656395cc765f0a16a3abf6 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:48 +0100
Subject: drm: fix inclusion of drm.h in mga_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/mga_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h
index 2375bfd6e5e9..fca817009e13 100644
--- a/include/uapi/drm/mga_drm.h
+++ b/include/uapi/drm/mga_drm.h
@@ -35,7 +35,7 @@
 #ifndef __MGA_DRM_H__
 #define __MGA_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the Xserver file (mga_sarea.h)
-- 
cgit v1.2.3-58-ga151


From 06577d042d38b27ed975c49358ad7adf873d09ae Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:49 +0100
Subject: drm: fix inclusion of drm.h in msm_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/msm_drm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 75a232b9a970..e995ffbcf86a 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -18,8 +18,7 @@
 #ifndef __MSM_DRM_H__
 #define __MSM_DRM_H__
 
-#include <stddef.h>
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints:
-- 
cgit v1.2.3-58-ga151


From 7ef500e431ac2f3bf14f6c39704e5db01ab5a341 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:50 +0100
Subject: drm: fix inclusion of drm.h in omap_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/omap_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 1d0b1172664e..0750c01bb480 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -20,7 +20,7 @@
 #ifndef __OMAP_DRM_H__
 #define __OMAP_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
-- 
cgit v1.2.3-58-ga151


From 8ca32846322254df34c33aeeb222f77edecfa113 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:51 +0100
Subject: drm: fix inclusion of drm.h in qxl_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/qxl_drm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h
index dface362d729..4d1e32640463 100644
--- a/include/uapi/drm/qxl_drm.h
+++ b/include/uapi/drm/qxl_drm.h
@@ -24,8 +24,7 @@
 #ifndef QXL_DRM_H
 #define QXL_DRM_H
 
-#include <stddef.h>
-#include "drm/drm.h"
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
-- 
cgit v1.2.3-58-ga151


From 355f47803af9de70eab4fa20c8527ae047d240b7 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:52 +0100
Subject: drm: fix inclusion of drm.h in r128_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/r128_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/r128_drm.h b/include/uapi/drm/r128_drm.h
index 76b0aa3e8210..7a44c6500a7e 100644
--- a/include/uapi/drm/r128_drm.h
+++ b/include/uapi/drm/r128_drm.h
@@ -33,7 +33,7 @@
 #ifndef __R128_DRM_H__
 #define __R128_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the X server file (r128_sarea.h)
-- 
cgit v1.2.3-58-ga151


From 3e2d2cdfa0fc277ec267a1fd4af41fdbb2f70255 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:53 +0100
Subject: drm: fix inclusion of drm.h in savage_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/savage_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/savage_drm.h b/include/uapi/drm/savage_drm.h
index 9dc9dc1a7753..574147489c60 100644
--- a/include/uapi/drm/savage_drm.h
+++ b/include/uapi/drm/savage_drm.h
@@ -26,7 +26,7 @@
 #ifndef __SAVAGE_DRM_H__
 #define __SAVAGE_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 #ifndef __SAVAGE_SAREA_DEFINES__
 #define __SAVAGE_SAREA_DEFINES__
-- 
cgit v1.2.3-58-ga151


From 678205a6c403a9da70dcd12ca61ace8d91ed0351 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:54 +0100
Subject: drm: fix inclusion of drm.h in tegra_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/tegra_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index 5391780c2b05..27d0b054aed0 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -23,7 +23,7 @@
 #ifndef _UAPI_TEGRA_DRM_H_
 #define _UAPI_TEGRA_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
-- 
cgit v1.2.3-58-ga151


From 6e82e9c85774a417af309497d0be5f125474d163 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:55 +0100
Subject: drm: fix inclusion of drm.h in virtgpu_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/virtgpu_drm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 4bcfbe6be8d4..c74f1f90cb37 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -24,8 +24,7 @@
 #ifndef VIRTGPU_DRM_H
 #define VIRTGPU_DRM_H
 
-#include <stddef.h>
-#include "drm/drm.h"
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
-- 
cgit v1.2.3-58-ga151


From 3a4197afc4e49083988e077d09dba88013f0e517 Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:56 +0100
Subject: drm: fix inclusion of drm.h in vmwgfx_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/vmwgfx_drm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index ef31f448fecf..5b68b4d10884 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -28,9 +28,7 @@
 #ifndef __VMWGFX_DRM_H__
 #define __VMWGFX_DRM_H__
 
-#ifndef __KERNEL__
-#include <drm/drm.h>
-#endif
+#include "drm.h"
 
 #define DRM_VMW_MAX_SURFACE_FACES 6
 #define DRM_VMW_MAX_MIP_LEVELS 24
-- 
cgit v1.2.3-58-ga151


From d7e12cd7b8c6d7426a401ec03ddcb88382180ced Mon Sep 17 00:00:00 2001
From: Gabriel Laskar <gabriel@lse.epita.fr>
Date: Mon, 30 Nov 2015 15:10:57 +0100
Subject: drm: fix inclusion of drm.h in via_drm.h

Using `#include "drm.h"` instead of `#include <drm/drm.h>` allow drm
headers to be moved in another directory without changes, like for the
libdrm imports.

Signed-off-by: Gabriel Laskar <gabriel@lse.epita.fr>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
CC: Emil Velikov <emil.l.velikov@gmail.com>
CC: Mikko Rapeli <mikko.rapeli@iki.fi>
---
 include/uapi/drm/via_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/via_drm.h b/include/uapi/drm/via_drm.h
index c8c053a12e93..fa21ed185520 100644
--- a/include/uapi/drm/via_drm.h
+++ b/include/uapi/drm/via_drm.h
@@ -24,7 +24,7 @@
 #ifndef _VIA_DRM_H_
 #define _VIA_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
-- 
cgit v1.2.3-58-ga151


From 8979a059281b2e25d864edfd2220def7140c09b1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Mon, 14 Dec 2015 09:59:56 -0500
Subject: drm/msm: trivial whitespace fix

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index e995ffbcf86a..81e6e0d1d360 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -121,7 +121,7 @@ struct drm_msm_gem_cpu_fini {
 struct drm_msm_gem_submit_reloc {
 	__u32 submit_offset;  /* in, offset from submit_bo */
 	__u32 or;             /* in, value OR'd with result */
-	__s32  shift;          /* in, amount of left shift (can be negative) */
+	__s32 shift;          /* in, amount of left shift (can be negative) */
 	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
 	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
 };
-- 
cgit v1.2.3-58-ga151


From a8c21a5451d831e67b7a6fb910f9ca8bc7b43554 Mon Sep 17 00:00:00 2001
From: The etnaviv authors <dri-devel@lists.freedesktop.org>
Date: Thu, 3 Dec 2015 18:21:29 +0100
Subject: drm/etnaviv: add initial etnaviv DRM driver

This adds the etnaviv DRM driver and hooks it up in Makefiles
and Kconfig.

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/Kconfig                      |    2 +
 drivers/gpu/drm/Makefile                     |    1 +
 drivers/gpu/drm/etnaviv/Kconfig              |   20 +
 drivers/gpu/drm/etnaviv/Makefile             |   14 +
 drivers/gpu/drm/etnaviv/cmdstream.xml.h      |  218 ++++
 drivers/gpu/drm/etnaviv/common.xml.h         |  249 ++++
 drivers/gpu/drm/etnaviv/etnaviv_buffer.c     |  268 +++++
 drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c |  209 ++++
 drivers/gpu/drm/etnaviv/etnaviv_drv.c        |  707 +++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_drv.h        |  161 +++
 drivers/gpu/drm/etnaviv/etnaviv_dump.c       |  227 ++++
 drivers/gpu/drm/etnaviv/etnaviv_dump.h       |   54 +
 drivers/gpu/drm/etnaviv/etnaviv_gem.c        |  897 ++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_gem.h        |  117 ++
 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c  |  122 ++
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  443 +++++++
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 1644 ++++++++++++++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |  209 ++++
 drivers/gpu/drm/etnaviv/etnaviv_iommu.c      |  240 ++++
 drivers/gpu/drm/etnaviv/etnaviv_iommu.h      |   28 +
 drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c   |   33 +
 drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h   |   25 +
 drivers/gpu/drm/etnaviv/etnaviv_mmu.c        |  299 +++++
 drivers/gpu/drm/etnaviv/etnaviv_mmu.h        |   71 ++
 drivers/gpu/drm/etnaviv/state.xml.h          |  351 ++++++
 drivers/gpu/drm/etnaviv/state_hi.xml.h       |  407 +++++++
 include/uapi/drm/etnaviv_drm.h               |  222 ++++
 27 files changed, 7238 insertions(+)
 create mode 100644 drivers/gpu/drm/etnaviv/Kconfig
 create mode 100644 drivers/gpu/drm/etnaviv/Makefile
 create mode 100644 drivers/gpu/drm/etnaviv/cmdstream.xml.h
 create mode 100644 drivers/gpu/drm/etnaviv/common.xml.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_buffer.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_drv.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_drv.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_dump.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_dump.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_gem.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_gem.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_gpu.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_gpu.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_iommu.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_iommu.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_mmu.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_mmu.h
 create mode 100644 drivers/gpu/drm/etnaviv/state.xml.h
 create mode 100644 drivers/gpu/drm/etnaviv/state_hi.xml.h
 create mode 100644 include/uapi/drm/etnaviv_drm.h

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index c4bf9a1cf4a6..b02ac62f5863 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -266,3 +266,5 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig"
 source "drivers/gpu/drm/imx/Kconfig"
 
 source "drivers/gpu/drm/vc4/Kconfig"
+
+source "drivers/gpu/drm/etnaviv/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 1e9ff4c3e3db..f858aa25fbb2 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -75,3 +75,4 @@ obj-y			+= i2c/
 obj-y			+= panel/
 obj-y			+= bridge/
 obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
+obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
new file mode 100644
index 000000000000..2cde7a5442fb
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -0,0 +1,20 @@
+
+config DRM_ETNAVIV
+	tristate "ETNAVIV (DRM support for Vivante GPU IP cores)"
+	depends on DRM
+	depends on ARCH_MXC || ARCH_DOVE
+	select SHMEM
+	select TMPFS
+	select IOMMU_API
+	select IOMMU_SUPPORT
+	select WANT_DEV_COREDUMP
+	help
+	  DRM driver for Vivante GPUs.
+
+config DRM_ETNAVIV_REGISTER_LOGGING
+	bool "enable ETNAVIV register logging"
+	depends on DRM_ETNAVIV
+	help
+	  Compile in support for logging register reads/writes in a format
+	  that can be parsed by envytools demsm tool.  If enabled, register
+	  logging can be switched on via etnaviv.reglog=y module param.
diff --git a/drivers/gpu/drm/etnaviv/Makefile b/drivers/gpu/drm/etnaviv/Makefile
new file mode 100644
index 000000000000..1086e9876f91
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/Makefile
@@ -0,0 +1,14 @@
+etnaviv-y := \
+	etnaviv_buffer.o \
+	etnaviv_cmd_parser.o \
+	etnaviv_drv.o \
+	etnaviv_dump.o \
+	etnaviv_gem_prime.o \
+	etnaviv_gem_submit.o \
+	etnaviv_gem.o \
+	etnaviv_gpu.o \
+	etnaviv_iommu_v2.o \
+	etnaviv_iommu.o \
+	etnaviv_mmu.o
+
+obj-$(CONFIG_DRM_ETNAVIV)	+= etnaviv.o
diff --git a/drivers/gpu/drm/etnaviv/cmdstream.xml.h b/drivers/gpu/drm/etnaviv/cmdstream.xml.h
new file mode 100644
index 000000000000..8c44ba9a694e
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/cmdstream.xml.h
@@ -0,0 +1,218 @@
+#ifndef CMDSTREAM_XML
+#define CMDSTREAM_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- cmdstream.xml (  12589 bytes, from 2014-02-17 14:57:56)
+- common.xml    (  18437 bytes, from 2015-03-25 11:27:41)
+
+Copyright (C) 2014
+*/
+
+
+#define FE_OPCODE_LOAD_STATE					0x00000001
+#define FE_OPCODE_END						0x00000002
+#define FE_OPCODE_NOP						0x00000003
+#define FE_OPCODE_DRAW_2D					0x00000004
+#define FE_OPCODE_DRAW_PRIMITIVES				0x00000005
+#define FE_OPCODE_DRAW_INDEXED_PRIMITIVES			0x00000006
+#define FE_OPCODE_WAIT						0x00000007
+#define FE_OPCODE_LINK						0x00000008
+#define FE_OPCODE_STALL						0x00000009
+#define FE_OPCODE_CALL						0x0000000a
+#define FE_OPCODE_RETURN					0x0000000b
+#define FE_OPCODE_CHIP_SELECT					0x0000000d
+#define PRIMITIVE_TYPE_POINTS					0x00000001
+#define PRIMITIVE_TYPE_LINES					0x00000002
+#define PRIMITIVE_TYPE_LINE_STRIP				0x00000003
+#define PRIMITIVE_TYPE_TRIANGLES				0x00000004
+#define PRIMITIVE_TYPE_TRIANGLE_STRIP				0x00000005
+#define PRIMITIVE_TYPE_TRIANGLE_FAN				0x00000006
+#define PRIMITIVE_TYPE_LINE_LOOP				0x00000007
+#define PRIMITIVE_TYPE_QUADS					0x00000008
+#define VIV_FE_LOAD_STATE					0x00000000
+
+#define VIV_FE_LOAD_STATE_HEADER				0x00000000
+#define VIV_FE_LOAD_STATE_HEADER_OP__MASK			0xf8000000
+#define VIV_FE_LOAD_STATE_HEADER_OP__SHIFT			27
+#define VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE			0x08000000
+#define VIV_FE_LOAD_STATE_HEADER_FIXP				0x04000000
+#define VIV_FE_LOAD_STATE_HEADER_COUNT__MASK			0x03ff0000
+#define VIV_FE_LOAD_STATE_HEADER_COUNT__SHIFT			16
+#define VIV_FE_LOAD_STATE_HEADER_COUNT(x)			(((x) << VIV_FE_LOAD_STATE_HEADER_COUNT__SHIFT) & VIV_FE_LOAD_STATE_HEADER_COUNT__MASK)
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET__MASK			0x0000ffff
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET__SHIFT			0
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET(x)			(((x) << VIV_FE_LOAD_STATE_HEADER_OFFSET__SHIFT) & VIV_FE_LOAD_STATE_HEADER_OFFSET__MASK)
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR			2
+
+#define VIV_FE_END						0x00000000
+
+#define VIV_FE_END_HEADER					0x00000000
+#define VIV_FE_END_HEADER_EVENT_ID__MASK			0x0000001f
+#define VIV_FE_END_HEADER_EVENT_ID__SHIFT			0
+#define VIV_FE_END_HEADER_EVENT_ID(x)				(((x) << VIV_FE_END_HEADER_EVENT_ID__SHIFT) & VIV_FE_END_HEADER_EVENT_ID__MASK)
+#define VIV_FE_END_HEADER_EVENT_ENABLE				0x00000100
+#define VIV_FE_END_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_END_HEADER_OP__SHIFT				27
+#define VIV_FE_END_HEADER_OP_END				0x10000000
+
+#define VIV_FE_NOP						0x00000000
+
+#define VIV_FE_NOP_HEADER					0x00000000
+#define VIV_FE_NOP_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_NOP_HEADER_OP__SHIFT				27
+#define VIV_FE_NOP_HEADER_OP_NOP				0x18000000
+
+#define VIV_FE_DRAW_2D						0x00000000
+
+#define VIV_FE_DRAW_2D_HEADER					0x00000000
+#define VIV_FE_DRAW_2D_HEADER_COUNT__MASK			0x0000ff00
+#define VIV_FE_DRAW_2D_HEADER_COUNT__SHIFT			8
+#define VIV_FE_DRAW_2D_HEADER_COUNT(x)				(((x) << VIV_FE_DRAW_2D_HEADER_COUNT__SHIFT) & VIV_FE_DRAW_2D_HEADER_COUNT__MASK)
+#define VIV_FE_DRAW_2D_HEADER_DATA_COUNT__MASK			0x07ff0000
+#define VIV_FE_DRAW_2D_HEADER_DATA_COUNT__SHIFT			16
+#define VIV_FE_DRAW_2D_HEADER_DATA_COUNT(x)			(((x) << VIV_FE_DRAW_2D_HEADER_DATA_COUNT__SHIFT) & VIV_FE_DRAW_2D_HEADER_DATA_COUNT__MASK)
+#define VIV_FE_DRAW_2D_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_DRAW_2D_HEADER_OP__SHIFT				27
+#define VIV_FE_DRAW_2D_HEADER_OP_DRAW_2D			0x20000000
+
+#define VIV_FE_DRAW_2D_TOP_LEFT					0x00000008
+#define VIV_FE_DRAW_2D_TOP_LEFT_X__MASK				0x0000ffff
+#define VIV_FE_DRAW_2D_TOP_LEFT_X__SHIFT			0
+#define VIV_FE_DRAW_2D_TOP_LEFT_X(x)				(((x) << VIV_FE_DRAW_2D_TOP_LEFT_X__SHIFT) & VIV_FE_DRAW_2D_TOP_LEFT_X__MASK)
+#define VIV_FE_DRAW_2D_TOP_LEFT_Y__MASK				0xffff0000
+#define VIV_FE_DRAW_2D_TOP_LEFT_Y__SHIFT			16
+#define VIV_FE_DRAW_2D_TOP_LEFT_Y(x)				(((x) << VIV_FE_DRAW_2D_TOP_LEFT_Y__SHIFT) & VIV_FE_DRAW_2D_TOP_LEFT_Y__MASK)
+
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT				0x0000000c
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__MASK			0x0000ffff
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__SHIFT			0
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_X(x)			(((x) << VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__SHIFT) & VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__MASK)
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__MASK			0xffff0000
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__SHIFT			16
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y(x)			(((x) << VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__SHIFT) & VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__MASK)
+
+#define VIV_FE_DRAW_PRIMITIVES					0x00000000
+
+#define VIV_FE_DRAW_PRIMITIVES_HEADER				0x00000000
+#define VIV_FE_DRAW_PRIMITIVES_HEADER_OP__MASK			0xf8000000
+#define VIV_FE_DRAW_PRIMITIVES_HEADER_OP__SHIFT			27
+#define VIV_FE_DRAW_PRIMITIVES_HEADER_OP_DRAW_PRIMITIVES	0x28000000
+
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND				0x00000004
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__MASK		0x000000ff
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__SHIFT		0
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE(x)			(((x) << VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__SHIFT) & VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__MASK)
+
+#define VIV_FE_DRAW_PRIMITIVES_START				0x00000008
+
+#define VIV_FE_DRAW_PRIMITIVES_COUNT				0x0000000c
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES				0x00000000
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER			0x00000000
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER_OP__MASK		0xf8000000
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER_OP__SHIFT		27
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER_OP_DRAW_INDEXED_PRIMITIVES	0x30000000
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND			0x00000004
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__MASK	0x000000ff
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__SHIFT	0
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE(x)		(((x) << VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__SHIFT) & VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__MASK)
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_START			0x00000008
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COUNT			0x0000000c
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_OFFSET			0x00000010
+
+#define VIV_FE_WAIT						0x00000000
+
+#define VIV_FE_WAIT_HEADER					0x00000000
+#define VIV_FE_WAIT_HEADER_DELAY__MASK				0x0000ffff
+#define VIV_FE_WAIT_HEADER_DELAY__SHIFT				0
+#define VIV_FE_WAIT_HEADER_DELAY(x)				(((x) << VIV_FE_WAIT_HEADER_DELAY__SHIFT) & VIV_FE_WAIT_HEADER_DELAY__MASK)
+#define VIV_FE_WAIT_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_WAIT_HEADER_OP__SHIFT				27
+#define VIV_FE_WAIT_HEADER_OP_WAIT				0x38000000
+
+#define VIV_FE_LINK						0x00000000
+
+#define VIV_FE_LINK_HEADER					0x00000000
+#define VIV_FE_LINK_HEADER_PREFETCH__MASK			0x0000ffff
+#define VIV_FE_LINK_HEADER_PREFETCH__SHIFT			0
+#define VIV_FE_LINK_HEADER_PREFETCH(x)				(((x) << VIV_FE_LINK_HEADER_PREFETCH__SHIFT) & VIV_FE_LINK_HEADER_PREFETCH__MASK)
+#define VIV_FE_LINK_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_LINK_HEADER_OP__SHIFT				27
+#define VIV_FE_LINK_HEADER_OP_LINK				0x40000000
+
+#define VIV_FE_LINK_ADDRESS					0x00000004
+
+#define VIV_FE_STALL						0x00000000
+
+#define VIV_FE_STALL_HEADER					0x00000000
+#define VIV_FE_STALL_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_STALL_HEADER_OP__SHIFT				27
+#define VIV_FE_STALL_HEADER_OP_STALL				0x48000000
+
+#define VIV_FE_STALL_TOKEN					0x00000004
+#define VIV_FE_STALL_TOKEN_FROM__MASK				0x0000001f
+#define VIV_FE_STALL_TOKEN_FROM__SHIFT				0
+#define VIV_FE_STALL_TOKEN_FROM(x)				(((x) << VIV_FE_STALL_TOKEN_FROM__SHIFT) & VIV_FE_STALL_TOKEN_FROM__MASK)
+#define VIV_FE_STALL_TOKEN_TO__MASK				0x00001f00
+#define VIV_FE_STALL_TOKEN_TO__SHIFT				8
+#define VIV_FE_STALL_TOKEN_TO(x)				(((x) << VIV_FE_STALL_TOKEN_TO__SHIFT) & VIV_FE_STALL_TOKEN_TO__MASK)
+
+#define VIV_FE_CALL						0x00000000
+
+#define VIV_FE_CALL_HEADER					0x00000000
+#define VIV_FE_CALL_HEADER_PREFETCH__MASK			0x0000ffff
+#define VIV_FE_CALL_HEADER_PREFETCH__SHIFT			0
+#define VIV_FE_CALL_HEADER_PREFETCH(x)				(((x) << VIV_FE_CALL_HEADER_PREFETCH__SHIFT) & VIV_FE_CALL_HEADER_PREFETCH__MASK)
+#define VIV_FE_CALL_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_CALL_HEADER_OP__SHIFT				27
+#define VIV_FE_CALL_HEADER_OP_CALL				0x50000000
+
+#define VIV_FE_CALL_ADDRESS					0x00000004
+
+#define VIV_FE_CALL_RETURN_PREFETCH				0x00000008
+
+#define VIV_FE_CALL_RETURN_ADDRESS				0x0000000c
+
+#define VIV_FE_RETURN						0x00000000
+
+#define VIV_FE_RETURN_HEADER					0x00000000
+#define VIV_FE_RETURN_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_RETURN_HEADER_OP__SHIFT				27
+#define VIV_FE_RETURN_HEADER_OP_RETURN				0x58000000
+
+#define VIV_FE_CHIP_SELECT					0x00000000
+
+#define VIV_FE_CHIP_SELECT_HEADER				0x00000000
+#define VIV_FE_CHIP_SELECT_HEADER_OP__MASK			0xf8000000
+#define VIV_FE_CHIP_SELECT_HEADER_OP__SHIFT			27
+#define VIV_FE_CHIP_SELECT_HEADER_OP_CHIP_SELECT		0x68000000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP15			0x00008000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP14			0x00004000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP13			0x00002000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP12			0x00001000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP11			0x00000800
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP10			0x00000400
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP9			0x00000200
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP8			0x00000100
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP7			0x00000080
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP6			0x00000040
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP5			0x00000020
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP4			0x00000010
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP3			0x00000008
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP2			0x00000004
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP1			0x00000002
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP0			0x00000001
+
+
+#endif /* CMDSTREAM_XML */
diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
new file mode 100644
index 000000000000..9e585d51fb78
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -0,0 +1,249 @@
+#ifndef COMMON_XML
+#define COMMON_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
+- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+
+Copyright (C) 2015
+*/
+
+
+#define PIPE_ID_PIPE_3D						0x00000000
+#define PIPE_ID_PIPE_2D						0x00000001
+#define SYNC_RECIPIENT_FE					0x00000001
+#define SYNC_RECIPIENT_RA					0x00000005
+#define SYNC_RECIPIENT_PE					0x00000007
+#define SYNC_RECIPIENT_DE					0x0000000b
+#define SYNC_RECIPIENT_VG					0x0000000f
+#define SYNC_RECIPIENT_TESSELATOR				0x00000010
+#define SYNC_RECIPIENT_VG2					0x00000011
+#define SYNC_RECIPIENT_TESSELATOR2				0x00000012
+#define SYNC_RECIPIENT_VG3					0x00000013
+#define SYNC_RECIPIENT_TESSELATOR3				0x00000014
+#define ENDIAN_MODE_NO_SWAP					0x00000000
+#define ENDIAN_MODE_SWAP_16					0x00000001
+#define ENDIAN_MODE_SWAP_32					0x00000002
+#define chipModel_GC300						0x00000300
+#define chipModel_GC320						0x00000320
+#define chipModel_GC350						0x00000350
+#define chipModel_GC355						0x00000355
+#define chipModel_GC400						0x00000400
+#define chipModel_GC410						0x00000410
+#define chipModel_GC420						0x00000420
+#define chipModel_GC450						0x00000450
+#define chipModel_GC500						0x00000500
+#define chipModel_GC530						0x00000530
+#define chipModel_GC600						0x00000600
+#define chipModel_GC700						0x00000700
+#define chipModel_GC800						0x00000800
+#define chipModel_GC860						0x00000860
+#define chipModel_GC880						0x00000880
+#define chipModel_GC1000					0x00001000
+#define chipModel_GC2000					0x00002000
+#define chipModel_GC2100					0x00002100
+#define chipModel_GC4000					0x00004000
+#define RGBA_BITS_R						0x00000001
+#define RGBA_BITS_G						0x00000002
+#define RGBA_BITS_B						0x00000004
+#define RGBA_BITS_A						0x00000008
+#define chipFeatures_FAST_CLEAR					0x00000001
+#define chipFeatures_SPECIAL_ANTI_ALIASING			0x00000002
+#define chipFeatures_PIPE_3D					0x00000004
+#define chipFeatures_DXT_TEXTURE_COMPRESSION			0x00000008
+#define chipFeatures_DEBUG_MODE					0x00000010
+#define chipFeatures_Z_COMPRESSION				0x00000020
+#define chipFeatures_YUV420_SCALER				0x00000040
+#define chipFeatures_MSAA					0x00000080
+#define chipFeatures_DC						0x00000100
+#define chipFeatures_PIPE_2D					0x00000200
+#define chipFeatures_ETC1_TEXTURE_COMPRESSION			0x00000400
+#define chipFeatures_FAST_SCALER				0x00000800
+#define chipFeatures_HIGH_DYNAMIC_RANGE				0x00001000
+#define chipFeatures_YUV420_TILER				0x00002000
+#define chipFeatures_MODULE_CG					0x00004000
+#define chipFeatures_MIN_AREA					0x00008000
+#define chipFeatures_NO_EARLY_Z					0x00010000
+#define chipFeatures_NO_422_TEXTURE				0x00020000
+#define chipFeatures_BUFFER_INTERLEAVING			0x00040000
+#define chipFeatures_BYTE_WRITE_2D				0x00080000
+#define chipFeatures_NO_SCALER					0x00100000
+#define chipFeatures_YUY2_AVERAGING				0x00200000
+#define chipFeatures_HALF_PE_CACHE				0x00400000
+#define chipFeatures_HALF_TX_CACHE				0x00800000
+#define chipFeatures_YUY2_RENDER_TARGET				0x01000000
+#define chipFeatures_MEM32					0x02000000
+#define chipFeatures_PIPE_VG					0x04000000
+#define chipFeatures_VGTS					0x08000000
+#define chipFeatures_FE20					0x10000000
+#define chipFeatures_BYTE_WRITE_3D				0x20000000
+#define chipFeatures_RS_YUV_TARGET				0x40000000
+#define chipFeatures_32_BIT_INDICES				0x80000000
+#define chipMinorFeatures0_FLIP_Y				0x00000001
+#define chipMinorFeatures0_DUAL_RETURN_BUS			0x00000002
+#define chipMinorFeatures0_ENDIANNESS_CONFIG			0x00000004
+#define chipMinorFeatures0_TEXTURE_8K				0x00000008
+#define chipMinorFeatures0_CORRECT_TEXTURE_CONVERTER		0x00000010
+#define chipMinorFeatures0_SPECIAL_MSAA_LOD			0x00000020
+#define chipMinorFeatures0_FAST_CLEAR_FLUSH			0x00000040
+#define chipMinorFeatures0_2DPE20				0x00000080
+#define chipMinorFeatures0_CORRECT_AUTO_DISABLE			0x00000100
+#define chipMinorFeatures0_RENDERTARGET_8K			0x00000200
+#define chipMinorFeatures0_2BITPERTILE				0x00000400
+#define chipMinorFeatures0_SEPARATE_TILE_STATUS_WHEN_INTERLEAVED	0x00000800
+#define chipMinorFeatures0_SUPER_TILED				0x00001000
+#define chipMinorFeatures0_VG_20				0x00002000
+#define chipMinorFeatures0_TS_EXTENDED_COMMANDS			0x00004000
+#define chipMinorFeatures0_COMPRESSION_FIFO_FIXED		0x00008000
+#define chipMinorFeatures0_HAS_SIGN_FLOOR_CEIL			0x00010000
+#define chipMinorFeatures0_VG_FILTER				0x00020000
+#define chipMinorFeatures0_VG_21				0x00040000
+#define chipMinorFeatures0_SHADER_HAS_W				0x00080000
+#define chipMinorFeatures0_HAS_SQRT_TRIG			0x00100000
+#define chipMinorFeatures0_MORE_MINOR_FEATURES			0x00200000
+#define chipMinorFeatures0_MC20					0x00400000
+#define chipMinorFeatures0_MSAA_SIDEBAND			0x00800000
+#define chipMinorFeatures0_BUG_FIXES0				0x01000000
+#define chipMinorFeatures0_VAA					0x02000000
+#define chipMinorFeatures0_BYPASS_IN_MSAA			0x04000000
+#define chipMinorFeatures0_HZ					0x08000000
+#define chipMinorFeatures0_NEW_TEXTURE				0x10000000
+#define chipMinorFeatures0_2D_A8_TARGET				0x20000000
+#define chipMinorFeatures0_CORRECT_STENCIL			0x40000000
+#define chipMinorFeatures0_ENHANCE_VR				0x80000000
+#define chipMinorFeatures1_RSUV_SWIZZLE				0x00000001
+#define chipMinorFeatures1_V2_COMPRESSION			0x00000002
+#define chipMinorFeatures1_VG_DOUBLE_BUFFER			0x00000004
+#define chipMinorFeatures1_EXTRA_EVENT_STATES			0x00000008
+#define chipMinorFeatures1_NO_STRIPING_NEEDED			0x00000010
+#define chipMinorFeatures1_TEXTURE_STRIDE			0x00000020
+#define chipMinorFeatures1_BUG_FIXES3				0x00000040
+#define chipMinorFeatures1_AUTO_DISABLE				0x00000080
+#define chipMinorFeatures1_AUTO_RESTART_TS			0x00000100
+#define chipMinorFeatures1_DISABLE_PE_GATING			0x00000200
+#define chipMinorFeatures1_L2_WINDOWING				0x00000400
+#define chipMinorFeatures1_HALF_FLOAT				0x00000800
+#define chipMinorFeatures1_PIXEL_DITHER				0x00001000
+#define chipMinorFeatures1_TWO_STENCIL_REFERENCE		0x00002000
+#define chipMinorFeatures1_EXTENDED_PIXEL_FORMAT		0x00004000
+#define chipMinorFeatures1_CORRECT_MIN_MAX_DEPTH		0x00008000
+#define chipMinorFeatures1_2D_DITHER				0x00010000
+#define chipMinorFeatures1_BUG_FIXES5				0x00020000
+#define chipMinorFeatures1_NEW_2D				0x00040000
+#define chipMinorFeatures1_NEW_FP				0x00080000
+#define chipMinorFeatures1_TEXTURE_HALIGN			0x00100000
+#define chipMinorFeatures1_NON_POWER_OF_TWO			0x00200000
+#define chipMinorFeatures1_LINEAR_TEXTURE_SUPPORT		0x00400000
+#define chipMinorFeatures1_HALTI0				0x00800000
+#define chipMinorFeatures1_CORRECT_OVERFLOW_VG			0x01000000
+#define chipMinorFeatures1_NEGATIVE_LOG_FIX			0x02000000
+#define chipMinorFeatures1_RESOLVE_OFFSET			0x04000000
+#define chipMinorFeatures1_OK_TO_GATE_AXI_CLOCK			0x08000000
+#define chipMinorFeatures1_MMU_VERSION				0x10000000
+#define chipMinorFeatures1_WIDE_LINE				0x20000000
+#define chipMinorFeatures1_BUG_FIXES6				0x40000000
+#define chipMinorFeatures1_FC_FLUSH_STALL			0x80000000
+#define chipMinorFeatures2_LINE_LOOP				0x00000001
+#define chipMinorFeatures2_LOGIC_OP				0x00000002
+#define chipMinorFeatures2_UNK2					0x00000004
+#define chipMinorFeatures2_SUPERTILED_TEXTURE			0x00000008
+#define chipMinorFeatures2_UNK4					0x00000010
+#define chipMinorFeatures2_RECT_PRIMITIVE			0x00000020
+#define chipMinorFeatures2_COMPOSITION				0x00000040
+#define chipMinorFeatures2_CORRECT_AUTO_DISABLE_COUNT		0x00000080
+#define chipMinorFeatures2_UNK8					0x00000100
+#define chipMinorFeatures2_UNK9					0x00000200
+#define chipMinorFeatures2_UNK10				0x00000400
+#define chipMinorFeatures2_SAMPLERBASE_16			0x00000800
+#define chipMinorFeatures2_UNK12				0x00001000
+#define chipMinorFeatures2_UNK13				0x00002000
+#define chipMinorFeatures2_UNK14				0x00004000
+#define chipMinorFeatures2_EXTRA_TEXTURE_STATE			0x00008000
+#define chipMinorFeatures2_FULL_DIRECTFB			0x00010000
+#define chipMinorFeatures2_2D_TILING				0x00020000
+#define chipMinorFeatures2_THREAD_WALKER_IN_PS			0x00040000
+#define chipMinorFeatures2_TILE_FILLER				0x00080000
+#define chipMinorFeatures2_UNK20				0x00100000
+#define chipMinorFeatures2_2D_MULTI_SOURCE_BLIT			0x00200000
+#define chipMinorFeatures2_UNK22				0x00400000
+#define chipMinorFeatures2_UNK23				0x00800000
+#define chipMinorFeatures2_UNK24				0x01000000
+#define chipMinorFeatures2_MIXED_STREAMS			0x02000000
+#define chipMinorFeatures2_2D_420_L2CACHE			0x04000000
+#define chipMinorFeatures2_UNK27				0x08000000
+#define chipMinorFeatures2_2D_NO_INDEX8_BRUSH			0x10000000
+#define chipMinorFeatures2_TEXTURE_TILED_READ			0x20000000
+#define chipMinorFeatures2_UNK30				0x40000000
+#define chipMinorFeatures2_UNK31				0x80000000
+#define chipMinorFeatures3_ROTATION_STALL_FIX			0x00000001
+#define chipMinorFeatures3_UNK1					0x00000002
+#define chipMinorFeatures3_2D_MULTI_SOURCE_BLT_EX		0x00000004
+#define chipMinorFeatures3_UNK3					0x00000008
+#define chipMinorFeatures3_UNK4					0x00000010
+#define chipMinorFeatures3_UNK5					0x00000020
+#define chipMinorFeatures3_UNK6					0x00000040
+#define chipMinorFeatures3_UNK7					0x00000080
+#define chipMinorFeatures3_UNK8					0x00000100
+#define chipMinorFeatures3_UNK9					0x00000200
+#define chipMinorFeatures3_BUG_FIXES10				0x00000400
+#define chipMinorFeatures3_UNK11				0x00000800
+#define chipMinorFeatures3_BUG_FIXES11				0x00001000
+#define chipMinorFeatures3_UNK13				0x00002000
+#define chipMinorFeatures3_UNK14				0x00004000
+#define chipMinorFeatures3_UNK15				0x00008000
+#define chipMinorFeatures3_UNK16				0x00010000
+#define chipMinorFeatures3_UNK17				0x00020000
+#define chipMinorFeatures3_UNK18				0x00040000
+#define chipMinorFeatures3_UNK19				0x00080000
+#define chipMinorFeatures3_UNK20				0x00100000
+#define chipMinorFeatures3_UNK21				0x00200000
+#define chipMinorFeatures3_UNK22				0x00400000
+#define chipMinorFeatures3_UNK23				0x00800000
+#define chipMinorFeatures3_UNK24				0x01000000
+#define chipMinorFeatures3_UNK25				0x02000000
+#define chipMinorFeatures3_UNK26				0x04000000
+#define chipMinorFeatures3_UNK27				0x08000000
+#define chipMinorFeatures3_UNK28				0x10000000
+#define chipMinorFeatures3_UNK29				0x20000000
+#define chipMinorFeatures3_UNK30				0x40000000
+#define chipMinorFeatures3_UNK31				0x80000000
+#define chipMinorFeatures4_UNK0					0x00000001
+#define chipMinorFeatures4_UNK1					0x00000002
+#define chipMinorFeatures4_UNK2					0x00000004
+#define chipMinorFeatures4_UNK3					0x00000008
+#define chipMinorFeatures4_UNK4					0x00000010
+#define chipMinorFeatures4_UNK5					0x00000020
+#define chipMinorFeatures4_UNK6					0x00000040
+#define chipMinorFeatures4_UNK7					0x00000080
+#define chipMinorFeatures4_UNK8					0x00000100
+#define chipMinorFeatures4_UNK9					0x00000200
+#define chipMinorFeatures4_UNK10				0x00000400
+#define chipMinorFeatures4_UNK11				0x00000800
+#define chipMinorFeatures4_UNK12				0x00001000
+#define chipMinorFeatures4_UNK13				0x00002000
+#define chipMinorFeatures4_UNK14				0x00004000
+#define chipMinorFeatures4_UNK15				0x00008000
+#define chipMinorFeatures4_UNK16				0x00010000
+#define chipMinorFeatures4_UNK17				0x00020000
+#define chipMinorFeatures4_UNK18				0x00040000
+#define chipMinorFeatures4_UNK19				0x00080000
+#define chipMinorFeatures4_UNK20				0x00100000
+#define chipMinorFeatures4_UNK21				0x00200000
+#define chipMinorFeatures4_UNK22				0x00400000
+#define chipMinorFeatures4_UNK23				0x00800000
+#define chipMinorFeatures4_UNK24				0x01000000
+#define chipMinorFeatures4_UNK25				0x02000000
+#define chipMinorFeatures4_UNK26				0x04000000
+#define chipMinorFeatures4_UNK27				0x08000000
+#define chipMinorFeatures4_UNK28				0x10000000
+#define chipMinorFeatures4_UNK29				0x20000000
+#define chipMinorFeatures4_UNK30				0x40000000
+#define chipMinorFeatures4_UNK31				0x80000000
+
+#endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
new file mode 100644
index 000000000000..332c55ebba6d
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2014 Etnaviv Project
+ * Author: Christian Gmeiner <christian.gmeiner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
+
+#include "common.xml.h"
+#include "state.xml.h"
+#include "cmdstream.xml.h"
+
+/*
+ * Command Buffer helper:
+ */
+
+
+static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
+{
+	u32 *vaddr = (u32 *)buffer->vaddr;
+
+	BUG_ON(buffer->user_size >= buffer->size);
+
+	vaddr[buffer->user_size / 4] = data;
+	buffer->user_size += 4;
+}
+
+static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
+	u32 reg, u32 value)
+{
+	u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
+
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	/* write a register via cmd stream */
+	OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
+		    VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
+		    VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
+	OUT(buffer, value);
+}
+
+static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_END_HEADER_OP_END);
+}
+
+static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
+}
+
+static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
+	u16 prefetch, u32 address)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
+		    VIV_FE_LINK_HEADER_PREFETCH(prefetch));
+	OUT(buffer, address);
+}
+
+static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
+	u32 from, u32 to)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
+	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
+}
+
+static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe)
+{
+	u32 flush;
+	u32 stall;
+
+	/*
+	 * This assumes that if we're switching to 2D, we're switching
+	 * away from 3D, and vice versa.  Hence, if we're switching to
+	 * the 2D core, we need to flush the 3D depth and color caches,
+	 * otherwise we need to flush the 2D pixel engine cache.
+	 */
+	if (pipe == ETNA_PIPE_2D)
+		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
+	else
+		flush = VIVS_GL_FLUSH_CACHE_PE2D;
+
+	stall = VIVS_GL_SEMAPHORE_TOKEN_FROM(SYNC_RECIPIENT_FE) |
+		VIVS_GL_SEMAPHORE_TOKEN_TO(SYNC_RECIPIENT_PE);
+
+	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
+	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN, stall);
+
+	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+
+	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
+		       VIVS_GL_PIPE_SELECT_PIPE(pipe));
+}
+
+static u32 gpu_va(struct etnaviv_gpu *gpu, struct etnaviv_cmdbuf *buf)
+{
+	return buf->paddr - gpu->memory_base;
+}
+
+static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
+	struct etnaviv_cmdbuf *buf, u32 off, u32 len)
+{
+	u32 size = buf->size;
+	u32 *ptr = buf->vaddr + off;
+
+	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
+			ptr, gpu_va(gpu, buf) + off, size - len * 4 - off);
+
+	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
+			ptr, len * 4, 0);
+}
+
+u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+
+	/* initialize buffer */
+	buffer->user_size = 0;
+
+	CMD_WAIT(buffer);
+	CMD_LINK(buffer, 2, gpu_va(gpu, buffer) + buffer->user_size - 4);
+
+	return buffer->user_size / 8;
+}
+
+void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+
+	/* Replace the last WAIT with an END */
+	buffer->user_size -= 16;
+
+	CMD_END(buffer);
+	mb();
+}
+
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
+	struct etnaviv_cmdbuf *cmdbuf)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	u32 *lw = buffer->vaddr + buffer->user_size - 16;
+	u32 back, link_target, link_size, reserve_size, extra_size = 0;
+
+	if (drm_debug & DRM_UT_DRIVER)
+		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
+
+	/*
+	 * If we need to flush the MMU prior to submitting this buffer, we
+	 * will need to append a mmu flush load state, followed by a new
+	 * link to this buffer - a total of four additional words.
+	 */
+	if (gpu->mmu->need_flush || gpu->switch_context) {
+		/* link command */
+		extra_size += 2;
+		/* flush command */
+		if (gpu->mmu->need_flush)
+			extra_size += 2;
+		/* pipe switch commands */
+		if (gpu->switch_context)
+			extra_size += 8;
+	}
+
+	reserve_size = (6 + extra_size) * 4;
+
+	/*
+	 * if we are going to completely overflow the buffer, we need to wrap.
+	 */
+	if (buffer->user_size + reserve_size > buffer->size)
+		buffer->user_size = 0;
+
+	/* save offset back into main buffer */
+	back = buffer->user_size + reserve_size - 6 * 4;
+	link_target = gpu_va(gpu, buffer) + buffer->user_size;
+	link_size = 6;
+
+	/* Skip over any extra instructions */
+	link_target += extra_size * sizeof(u32);
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
+			link_target, gpu_va(gpu, cmdbuf), cmdbuf->vaddr);
+
+	/* jump back from cmd to main buffer */
+	CMD_LINK(cmdbuf, link_size, link_target);
+
+	link_target = gpu_va(gpu, cmdbuf);
+	link_size = cmdbuf->size / 8;
+
+
+
+	if (drm_debug & DRM_UT_DRIVER) {
+		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
+			       cmdbuf->vaddr, cmdbuf->size, 0);
+
+		pr_info("link op: %p\n", lw);
+		pr_info("link addr: %p\n", lw + 1);
+		pr_info("addr: 0x%08x\n", link_target);
+		pr_info("back: 0x%08x\n", gpu_va(gpu, buffer) + back);
+		pr_info("event: %d\n", event);
+	}
+
+	if (gpu->mmu->need_flush || gpu->switch_context) {
+		u32 new_target = gpu_va(gpu, buffer) + buffer->user_size;
+
+		if (gpu->mmu->need_flush) {
+			/* Add the MMU flush */
+			CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
+				       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
+				       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
+				       VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
+				       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
+				       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
+
+			gpu->mmu->need_flush = false;
+		}
+
+		if (gpu->switch_context) {
+			etnaviv_cmd_select_pipe(buffer, cmdbuf->exec_state);
+			gpu->switch_context = false;
+		}
+
+		/* And the link to the first buffer */
+		CMD_LINK(buffer, link_size, link_target);
+
+		/* Update the link target to point to above instructions */
+		link_target = new_target;
+		link_size = extra_size;
+	}
+
+	/* trigger event */
+	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
+		       VIVS_GL_EVENT_FROM_PE);
+
+	/* append WAIT/LINK to main buffer */
+	CMD_WAIT(buffer);
+	CMD_LINK(buffer, 2, gpu_va(gpu, buffer) + (buffer->user_size - 4));
+
+	/* Change WAIT into a LINK command; write the address first. */
+	*(lw + 1) = link_target;
+	mb();
+	*(lw) = VIV_FE_LINK_HEADER_OP_LINK |
+		VIV_FE_LINK_HEADER_PREFETCH(link_size);
+	mb();
+
+	if (drm_debug & DRM_UT_DRIVER)
+		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
new file mode 100644
index 000000000000..dcfd565c88d1
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+
+#include "cmdstream.xml.h"
+
+#define EXTRACT(val, field) (((val) & field##__MASK) >> field##__SHIFT)
+
+struct etna_validation_state {
+	struct etnaviv_gpu *gpu;
+	const struct drm_etnaviv_gem_submit_reloc *relocs;
+	unsigned int num_relocs;
+	u32 *start;
+};
+
+static const struct {
+	u16 offset;
+	u16 size;
+} etnaviv_sensitive_states[] __initconst = {
+#define ST(start, num) { (start) >> 2, (num) }
+	/* 2D */
+	ST(0x1200, 1),
+	ST(0x1228, 1),
+	ST(0x1238, 1),
+	ST(0x1284, 1),
+	ST(0x128c, 1),
+	ST(0x1304, 1),
+	ST(0x1310, 1),
+	ST(0x1318, 1),
+	ST(0x12800, 4),
+	ST(0x128a0, 4),
+	ST(0x128c0, 4),
+	ST(0x12970, 4),
+	ST(0x12a00, 8),
+	ST(0x12b40, 8),
+	ST(0x12b80, 8),
+	ST(0x12ce0, 8),
+	/* 3D */
+	ST(0x0644, 1),
+	ST(0x064c, 1),
+	ST(0x0680, 8),
+	ST(0x1410, 1),
+	ST(0x1430, 1),
+	ST(0x1458, 1),
+	ST(0x1460, 8),
+	ST(0x1480, 8),
+	ST(0x1500, 8),
+	ST(0x1520, 8),
+	ST(0x1608, 1),
+	ST(0x1610, 1),
+	ST(0x1658, 1),
+	ST(0x165c, 1),
+	ST(0x1664, 1),
+	ST(0x1668, 1),
+	ST(0x16a4, 1),
+	ST(0x16c0, 8),
+	ST(0x16e0, 8),
+	ST(0x1740, 8),
+	ST(0x2400, 14 * 16),
+	ST(0x10800, 32 * 16),
+#undef ST
+};
+
+#define ETNAVIV_STATES_SIZE (VIV_FE_LOAD_STATE_HEADER_OFFSET__MASK + 1u)
+static DECLARE_BITMAP(etnaviv_states, ETNAVIV_STATES_SIZE);
+
+void __init etnaviv_validate_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(etnaviv_sensitive_states); i++)
+		bitmap_set(etnaviv_states, etnaviv_sensitive_states[i].offset,
+			   etnaviv_sensitive_states[i].size);
+}
+
+static void etnaviv_warn_if_non_sensitive(struct etna_validation_state *state,
+	unsigned int buf_offset, unsigned int state_addr)
+{
+	if (state->num_relocs && state->relocs->submit_offset < buf_offset) {
+		dev_warn_once(state->gpu->dev,
+			      "%s: relocation for non-sensitive state 0x%x at offset %u\n",
+			      __func__, state_addr,
+			      state->relocs->submit_offset);
+		while (state->num_relocs &&
+		       state->relocs->submit_offset < buf_offset) {
+			state->relocs++;
+			state->num_relocs--;
+		}
+	}
+}
+
+static bool etnaviv_validate_load_state(struct etna_validation_state *state,
+	u32 *ptr, unsigned int state_offset, unsigned int num)
+{
+	unsigned int size = min(ETNAVIV_STATES_SIZE, state_offset + num);
+	unsigned int st_offset = state_offset, buf_offset;
+
+	for_each_set_bit_from(st_offset, etnaviv_states, size) {
+		buf_offset = (ptr - state->start +
+			      st_offset - state_offset) * 4;
+
+		etnaviv_warn_if_non_sensitive(state, buf_offset, st_offset * 4);
+		if (state->num_relocs &&
+		    state->relocs->submit_offset == buf_offset) {
+			state->relocs++;
+			state->num_relocs--;
+			continue;
+		}
+
+		dev_warn_ratelimited(state->gpu->dev,
+				     "%s: load state touches restricted state 0x%x at offset %u\n",
+				     __func__, st_offset * 4, buf_offset);
+		return false;
+	}
+
+	if (state->num_relocs) {
+		buf_offset = (ptr - state->start + num) * 4;
+		etnaviv_warn_if_non_sensitive(state, buf_offset, st_offset * 4 +
+					      state->relocs->submit_offset -
+					      buf_offset);
+	}
+
+	return true;
+}
+
+static uint8_t cmd_length[32] = {
+	[FE_OPCODE_DRAW_PRIMITIVES] = 4,
+	[FE_OPCODE_DRAW_INDEXED_PRIMITIVES] = 6,
+	[FE_OPCODE_NOP] = 2,
+	[FE_OPCODE_STALL] = 2,
+};
+
+bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu, u32 *stream,
+			      unsigned int size,
+			      struct drm_etnaviv_gem_submit_reloc *relocs,
+			      unsigned int reloc_size)
+{
+	struct etna_validation_state state;
+	u32 *buf = stream;
+	u32 *end = buf + size;
+
+	state.gpu = gpu;
+	state.relocs = relocs;
+	state.num_relocs = reloc_size;
+	state.start = stream;
+
+	while (buf < end) {
+		u32 cmd = *buf;
+		unsigned int len, n, off;
+		unsigned int op = cmd >> 27;
+
+		switch (op) {
+		case FE_OPCODE_LOAD_STATE:
+			n = EXTRACT(cmd, VIV_FE_LOAD_STATE_HEADER_COUNT);
+			len = ALIGN(1 + n, 2);
+			if (buf + len > end)
+				break;
+
+			off = EXTRACT(cmd, VIV_FE_LOAD_STATE_HEADER_OFFSET);
+			if (!etnaviv_validate_load_state(&state, buf + 1,
+							 off, n))
+				return false;
+			break;
+
+		case FE_OPCODE_DRAW_2D:
+			n = EXTRACT(cmd, VIV_FE_DRAW_2D_HEADER_COUNT);
+			if (n == 0)
+				n = 256;
+			len = 2 + n * 2;
+			break;
+
+		default:
+			len = cmd_length[op];
+			if (len == 0) {
+				dev_err(gpu->dev, "%s: op %u not permitted at offset %tu\n",
+					__func__, op, buf - state.start);
+				return false;
+			}
+			break;
+		}
+
+		buf += len;
+	}
+
+	if (buf > end) {
+		dev_err(gpu->dev, "%s: commands overflow end of buffer: %tu > %u\n",
+			__func__, buf - state.start, size);
+		return false;
+	}
+
+	return true;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
new file mode 100644
index 000000000000..5c89ebb52fd2
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/component.h>
+#include <linux/of_platform.h>
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
+#include "etnaviv_gem.h"
+
+#ifdef CONFIG_DRM_ETNAVIV_REGISTER_LOGGING
+static bool reglog;
+MODULE_PARM_DESC(reglog, "Enable register read/write logging");
+module_param(reglog, bool, 0600);
+#else
+#define reglog 0
+#endif
+
+void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name,
+		const char *dbgname)
+{
+	struct resource *res;
+	void __iomem *ptr;
+
+	if (name)
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	else
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	ptr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ptr)) {
+		dev_err(&pdev->dev, "failed to ioremap %s: %ld\n", name,
+			PTR_ERR(ptr));
+		return ptr;
+	}
+
+	if (reglog)
+		dev_printk(KERN_DEBUG, &pdev->dev, "IO:region %s 0x%p %08zx\n",
+			   dbgname, ptr, (size_t)resource_size(res));
+
+	return ptr;
+}
+
+void etnaviv_writel(u32 data, void __iomem *addr)
+{
+	if (reglog)
+		printk(KERN_DEBUG "IO:W %p %08x\n", addr, data);
+
+	writel(data, addr);
+}
+
+u32 etnaviv_readl(const void __iomem *addr)
+{
+	u32 val = readl(addr);
+
+	if (reglog)
+		printk(KERN_DEBUG "IO:R %p %08x\n", addr, val);
+
+	return val;
+}
+
+/*
+ * DRM operations:
+ */
+
+
+static void load_gpu(struct drm_device *dev)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	unsigned int i;
+
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		struct etnaviv_gpu *g = priv->gpu[i];
+
+		if (g) {
+			int ret;
+
+			ret = etnaviv_gpu_init(g);
+			if (ret) {
+				dev_err(g->dev, "hw init failed: %d\n", ret);
+				priv->gpu[i] = NULL;
+			}
+		}
+	}
+}
+
+static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct etnaviv_file_private *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	file->driver_priv = ctx;
+
+	return 0;
+}
+
+static void etnaviv_preclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_file_private *ctx = file->driver_priv;
+	unsigned int i;
+
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		struct etnaviv_gpu *gpu = priv->gpu[i];
+
+		if (gpu) {
+			mutex_lock(&gpu->lock);
+			if (gpu->lastctx == ctx)
+				gpu->lastctx = NULL;
+			mutex_unlock(&gpu->lock);
+		}
+	}
+
+	kfree(ctx);
+}
+
+/*
+ * DRM debugfs:
+ */
+
+#ifdef CONFIG_DEBUG_FS
+static int etnaviv_gem_show(struct drm_device *dev, struct seq_file *m)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+
+	etnaviv_gem_describe_objects(priv, m);
+
+	return 0;
+}
+
+static int etnaviv_mm_show(struct drm_device *dev, struct seq_file *m)
+{
+	int ret;
+
+	read_lock(&dev->vma_offset_manager->vm_lock);
+	ret = drm_mm_dump_table(m, &dev->vma_offset_manager->vm_addr_space_mm);
+	read_unlock(&dev->vma_offset_manager->vm_lock);
+
+	return ret;
+}
+
+static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	seq_printf(m, "Active Objects (%s):\n", dev_name(gpu->dev));
+
+	mutex_lock(&gpu->mmu->lock);
+	drm_mm_dump_table(m, &gpu->mmu->mm);
+	mutex_unlock(&gpu->mmu->lock);
+
+	return 0;
+}
+
+static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	struct etnaviv_cmdbuf *buf = gpu->buffer;
+	u32 size = buf->size;
+	u32 *ptr = buf->vaddr;
+	u32 i;
+
+	seq_printf(m, "virt %p - phys 0x%llx - free 0x%08x\n",
+			buf->vaddr, (u64)buf->paddr, size - buf->user_size);
+
+	for (i = 0; i < size / 4; i++) {
+		if (i && !(i % 4))
+			seq_puts(m, "\n");
+		if (i % 4 == 0)
+			seq_printf(m, "\t0x%p: ", ptr + i);
+		seq_printf(m, "%08x ", *(ptr + i));
+	}
+	seq_puts(m, "\n");
+}
+
+static int etnaviv_ring_show(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	seq_printf(m, "Ring Buffer (%s): ", dev_name(gpu->dev));
+
+	mutex_lock(&gpu->lock);
+	etnaviv_buffer_dump(gpu, m);
+	mutex_unlock(&gpu->lock);
+
+	return 0;
+}
+
+static int show_unlocked(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	int (*show)(struct drm_device *dev, struct seq_file *m) =
+			node->info_ent->data;
+
+	return show(dev, m);
+}
+
+static int show_each_gpu(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_gpu *gpu;
+	int (*show)(struct etnaviv_gpu *gpu, struct seq_file *m) =
+			node->info_ent->data;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		gpu = priv->gpu[i];
+		if (!gpu)
+			continue;
+
+		ret = show(gpu, m);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
+static struct drm_info_list etnaviv_debugfs_list[] = {
+		{"gpu", show_each_gpu, 0, etnaviv_gpu_debugfs},
+		{"gem", show_unlocked, 0, etnaviv_gem_show},
+		{ "mm", show_unlocked, 0, etnaviv_mm_show },
+		{"mmu", show_each_gpu, 0, etnaviv_mmu_show},
+		{"ring", show_each_gpu, 0, etnaviv_ring_show},
+};
+
+static int etnaviv_debugfs_init(struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	int ret;
+
+	ret = drm_debugfs_create_files(etnaviv_debugfs_list,
+			ARRAY_SIZE(etnaviv_debugfs_list),
+			minor->debugfs_root, minor);
+
+	if (ret) {
+		dev_err(dev->dev, "could not install etnaviv_debugfs_list\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void etnaviv_debugfs_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(etnaviv_debugfs_list,
+			ARRAY_SIZE(etnaviv_debugfs_list), minor);
+}
+#endif
+
+/*
+ * DRM ioctls:
+ */
+
+static int etnaviv_ioctl_get_param(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct drm_etnaviv_param *args = data;
+	struct etnaviv_gpu *gpu;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	return etnaviv_gpu_get_param(gpu, args->param, &args->value);
+}
+
+static int etnaviv_ioctl_gem_new(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_new *args = data;
+
+	if (args->flags & ~(ETNA_BO_CACHED | ETNA_BO_WC | ETNA_BO_UNCACHED |
+			    ETNA_BO_FORCE_MMU))
+		return -EINVAL;
+
+	return etnaviv_gem_new_handle(dev, file, args->size,
+			args->flags, &args->handle);
+}
+
+#define TS(t) ((struct timespec){ \
+	.tv_sec = (t).tv_sec, \
+	.tv_nsec = (t).tv_nsec \
+})
+
+static int etnaviv_ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_cpu_prep *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->op & ~(ETNA_PREP_READ | ETNA_PREP_WRITE | ETNA_PREP_NOSYNC))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = etnaviv_gem_cpu_prep(obj, args->op, &TS(args->timeout));
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int etnaviv_ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_cpu_fini *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->flags)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = etnaviv_gem_cpu_fini(obj);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int etnaviv_ioctl_gem_info(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_info *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = etnaviv_gem_mmap_offset(obj, &args->offset);
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int etnaviv_ioctl_wait_fence(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_wait_fence *args = data;
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct timespec *timeout = &TS(args->timeout);
+	struct etnaviv_gpu *gpu;
+
+	if (args->flags & ~(ETNA_WAIT_NONBLOCK))
+		return -EINVAL;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	if (args->flags & ETNA_WAIT_NONBLOCK)
+		timeout = NULL;
+
+	return etnaviv_gpu_wait_fence_interruptible(gpu, args->fence,
+						    timeout);
+}
+
+static int etnaviv_ioctl_gem_userptr(struct drm_device *dev, void *data,
+	struct drm_file *file)
+{
+	struct drm_etnaviv_gem_userptr *args = data;
+	int access;
+
+	if (args->flags & ~(ETNA_USERPTR_READ|ETNA_USERPTR_WRITE) ||
+	    args->flags == 0)
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size) ||
+	    (uintptr_t)args->user_ptr != args->user_ptr ||
+	    (u32)args->user_size != args->user_size ||
+	    args->user_ptr & ~PAGE_MASK)
+		return -EINVAL;
+
+	if (args->flags & ETNA_USERPTR_WRITE)
+		access = VERIFY_WRITE;
+	else
+		access = VERIFY_READ;
+
+	if (!access_ok(access, (void __user *)(unsigned long)args->user_ptr,
+		       args->user_size))
+		return -EFAULT;
+
+	return etnaviv_gem_new_userptr(dev, file, args->user_ptr,
+				       args->user_size, args->flags,
+				       &args->handle);
+}
+
+static int etnaviv_ioctl_gem_wait(struct drm_device *dev, void *data,
+	struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct drm_etnaviv_gem_wait *args = data;
+	struct timespec *timeout = &TS(args->timeout);
+	struct drm_gem_object *obj;
+	struct etnaviv_gpu *gpu;
+	int ret;
+
+	if (args->flags & ~(ETNA_WAIT_NONBLOCK))
+		return -EINVAL;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	if (args->flags & ETNA_WAIT_NONBLOCK)
+		timeout = NULL;
+
+	ret = etnaviv_gem_wait_bo(gpu, obj, timeout);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static const struct drm_ioctl_desc etnaviv_ioctls[] = {
+#define ETNA_IOCTL(n, func, flags) \
+	DRM_IOCTL_DEF_DRV(ETNAVIV_##n, etnaviv_ioctl_##func, flags)
+	ETNA_IOCTL(GET_PARAM,    get_param,    DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_NEW,      gem_new,      DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_INFO,     gem_info,     DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_CPU_PREP, gem_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_CPU_FINI, gem_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_SUBMIT,   gem_submit,   DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(WAIT_FENCE,   wait_fence,   DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_USERPTR,  gem_userptr,  DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_WAIT,     gem_wait,     DRM_AUTH|DRM_RENDER_ALLOW),
+};
+
+static const struct vm_operations_struct vm_ops = {
+	.fault = etnaviv_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct file_operations fops = {
+	.owner              = THIS_MODULE,
+	.open               = drm_open,
+	.release            = drm_release,
+	.unlocked_ioctl     = drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl       = drm_compat_ioctl,
+#endif
+	.poll               = drm_poll,
+	.read               = drm_read,
+	.llseek             = no_llseek,
+	.mmap               = etnaviv_gem_mmap,
+};
+
+static struct drm_driver etnaviv_drm_driver = {
+	.driver_features    = DRIVER_HAVE_IRQ |
+				DRIVER_GEM |
+				DRIVER_PRIME |
+				DRIVER_RENDER,
+	.open               = etnaviv_open,
+	.preclose           = etnaviv_preclose,
+	.set_busid          = drm_platform_set_busid,
+	.gem_free_object    = etnaviv_gem_free_object,
+	.gem_vm_ops         = &vm_ops,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export   = drm_gem_prime_export,
+	.gem_prime_import   = drm_gem_prime_import,
+	.gem_prime_pin      = etnaviv_gem_prime_pin,
+	.gem_prime_unpin    = etnaviv_gem_prime_unpin,
+	.gem_prime_get_sg_table = etnaviv_gem_prime_get_sg_table,
+	.gem_prime_import_sg_table = etnaviv_gem_prime_import_sg_table,
+	.gem_prime_vmap     = etnaviv_gem_prime_vmap,
+	.gem_prime_vunmap   = etnaviv_gem_prime_vunmap,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init       = etnaviv_debugfs_init,
+	.debugfs_cleanup    = etnaviv_debugfs_cleanup,
+#endif
+	.ioctls             = etnaviv_ioctls,
+	.num_ioctls         = DRM_ETNAVIV_NUM_IOCTLS,
+	.fops               = &fops,
+	.name               = "etnaviv",
+	.desc               = "etnaviv DRM",
+	.date               = "20151214",
+	.major              = 1,
+	.minor              = 0,
+};
+
+/*
+ * Platform driver:
+ */
+static int etnaviv_bind(struct device *dev)
+{
+	struct etnaviv_drm_private *priv;
+	struct drm_device *drm;
+	int ret;
+
+	drm = drm_dev_alloc(&etnaviv_drm_driver, dev);
+	if (!drm)
+		return -ENOMEM;
+
+	drm->platformdev = to_platform_device(dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(dev, "failed to allocate private data\n");
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+	drm->dev_private = priv;
+
+	priv->wq = alloc_ordered_workqueue("etnaviv", 0);
+	if (!priv->wq) {
+		ret = -ENOMEM;
+		goto out_wq;
+	}
+
+	mutex_init(&priv->gem_lock);
+	INIT_LIST_HEAD(&priv->gem_list);
+	priv->num_gpus = 0;
+
+	dev_set_drvdata(dev, drm);
+
+	ret = component_bind_all(dev, drm);
+	if (ret < 0)
+		goto out_bind;
+
+	load_gpu(drm);
+
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		goto out_register;
+
+	return 0;
+
+out_register:
+	component_unbind_all(dev, drm);
+out_bind:
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+out_wq:
+	kfree(priv);
+out_unref:
+	drm_dev_unref(drm);
+
+	return ret;
+}
+
+static void etnaviv_unbind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct etnaviv_drm_private *priv = drm->dev_private;
+
+	drm_dev_unregister(drm);
+
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+
+	component_unbind_all(dev, drm);
+
+	drm->dev_private = NULL;
+	kfree(priv);
+
+	drm_put_dev(drm);
+}
+
+static const struct component_master_ops etnaviv_master_ops = {
+	.bind = etnaviv_bind,
+	.unbind = etnaviv_unbind,
+};
+
+static int compare_of(struct device *dev, void *data)
+{
+	struct device_node *np = data;
+
+	return dev->of_node == np;
+}
+
+static int compare_str(struct device *dev, void *data)
+{
+	return !strcmp(dev_name(dev), data);
+}
+
+static int etnaviv_pdev_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct component_match *match = NULL;
+
+	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+
+	if (node) {
+		struct device_node *core_node;
+		int i;
+
+		for (i = 0; ; i++) {
+			core_node = of_parse_phandle(node, "cores", i);
+			if (!core_node)
+				break;
+
+			component_match_add(&pdev->dev, &match, compare_of,
+					    core_node);
+			of_node_put(core_node);
+		}
+	} else if (dev->platform_data) {
+		char **names = dev->platform_data;
+		unsigned i;
+
+		for (i = 0; names[i]; i++)
+			component_match_add(dev, &match, compare_str, names[i]);
+	}
+
+	return component_master_add_with_match(dev, &etnaviv_master_ops, match);
+}
+
+static int etnaviv_pdev_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &etnaviv_master_ops);
+
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "fsl,imx-gpu-subsystem" },
+	{ .compatible = "marvell,dove-gpu-subsystem" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static struct platform_driver etnaviv_platform_driver = {
+	.probe      = etnaviv_pdev_probe,
+	.remove     = etnaviv_pdev_remove,
+	.driver     = {
+		.owner  = THIS_MODULE,
+		.name   = "etnaviv",
+		.of_match_table = dt_match,
+	},
+};
+
+static int __init etnaviv_init(void)
+{
+	int ret;
+
+	etnaviv_validate_init();
+
+	ret = platform_driver_register(&etnaviv_gpu_driver);
+	if (ret != 0)
+		return ret;
+
+	ret = platform_driver_register(&etnaviv_platform_driver);
+	if (ret != 0)
+		platform_driver_unregister(&etnaviv_gpu_driver);
+
+	return ret;
+}
+module_init(etnaviv_init);
+
+static void __exit etnaviv_exit(void)
+{
+	platform_driver_unregister(&etnaviv_gpu_driver);
+	platform_driver_unregister(&etnaviv_platform_driver);
+}
+module_exit(etnaviv_exit);
+
+MODULE_AUTHOR("Christian Gmeiner <christian.gmeiner@gmail.com>");
+MODULE_AUTHOR("Russell King <rmk+kernel@arm.linux.org.uk>");
+MODULE_AUTHOR("Lucas Stach <l.stach@pengutronix.de>");
+MODULE_DESCRIPTION("etnaviv DRM Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:etnaviv");
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
new file mode 100644
index 000000000000..d6bd438bd5be
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_DRV_H__
+#define __ETNAVIV_DRV_H__
+
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/iommu.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem.h>
+#include <drm/etnaviv_drm.h>
+
+struct etnaviv_cmdbuf;
+struct etnaviv_gpu;
+struct etnaviv_mmu;
+struct etnaviv_gem_object;
+struct etnaviv_gem_submit;
+
+struct etnaviv_file_private {
+	/* currently we don't do anything useful with this.. but when
+	 * per-context address spaces are supported we'd keep track of
+	 * the context's page-tables here.
+	 */
+	int dummy;
+};
+
+struct etnaviv_drm_private {
+	int num_gpus;
+	struct etnaviv_gpu *gpu[ETNA_MAX_PIPES];
+
+	/* list of GEM objects: */
+	struct mutex gem_lock;
+	struct list_head gem_list;
+
+	struct workqueue_struct *wq;
+};
+
+static inline void etnaviv_queue_work(struct drm_device *dev,
+	struct work_struct *w)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+
+	queue_work(priv->wq, w);
+}
+
+int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
+		struct drm_file *file);
+
+int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
+int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
+	struct drm_gem_object *obj, u32 *iova);
+void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj);
+struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
+void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
+void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
+	struct dma_buf_attachment *attach, struct sg_table *sg);
+int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
+void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
+void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
+		struct timespec *timeout);
+int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
+void etnaviv_gem_free_object(struct drm_gem_object *obj);
+int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		u32 size, u32 flags, u32 *handle);
+struct drm_gem_object *etnaviv_gem_new_locked(struct drm_device *dev,
+		u32 size, u32 flags);
+struct drm_gem_object *etnaviv_gem_new(struct drm_device *dev,
+		u32 size, u32 flags);
+int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
+	uintptr_t ptr, u32 size, u32 flags, u32 *handle);
+u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
+void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
+	struct etnaviv_cmdbuf *cmdbuf);
+void etnaviv_validate_init(void);
+bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
+	u32 *stream, unsigned int size,
+	struct drm_etnaviv_gem_submit_reloc *relocs, unsigned int reloc_size);
+
+#ifdef CONFIG_DEBUG_FS
+void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
+	struct seq_file *m);
+#endif
+
+void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name,
+		const char *dbgname);
+void etnaviv_writel(u32 data, void __iomem *addr);
+u32 etnaviv_readl(const void __iomem *addr);
+
+#define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
+#define VERB(fmt, ...) if (0) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
+
+/*
+ * Return the storage size of a structure with a variable length array.
+ * The array is nelem elements of elem_size, where the base structure
+ * is defined by base.  If the size overflows size_t, return zero.
+ */
+static inline size_t size_vstruct(size_t nelem, size_t elem_size, size_t base)
+{
+	if (elem_size && nelem > (SIZE_MAX - base) / elem_size)
+		return 0;
+	return base + nelem * elem_size;
+}
+
+/* returns true if fence a comes after fence b */
+static inline bool fence_after(u32 a, u32 b)
+{
+	return (s32)(a - b) > 0;
+}
+
+static inline bool fence_after_eq(u32 a, u32 b)
+{
+	return (s32)(a - b) >= 0;
+}
+
+static inline unsigned long etnaviv_timeout_to_jiffies(
+	const struct timespec *timeout)
+{
+	unsigned long timeout_jiffies = timespec_to_jiffies(timeout);
+	unsigned long start_jiffies = jiffies;
+	unsigned long remaining_jiffies;
+
+	if (time_after(start_jiffies, timeout_jiffies))
+		remaining_jiffies = 0;
+	else
+		remaining_jiffies = timeout_jiffies - start_jiffies;
+
+	return remaining_jiffies;
+}
+
+#endif /* __ETNAVIV_DRV_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
new file mode 100644
index 000000000000..bf8fa859e8be
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/devcoredump.h>
+#include "etnaviv_dump.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+#include "state.xml.h"
+#include "state_hi.xml.h"
+
+struct core_dump_iterator {
+	void *start;
+	struct etnaviv_dump_object_header *hdr;
+	void *data;
+};
+
+static const unsigned short etnaviv_dump_registers[] = {
+	VIVS_HI_AXI_STATUS,
+	VIVS_HI_CLOCK_CONTROL,
+	VIVS_HI_IDLE_STATE,
+	VIVS_HI_AXI_CONFIG,
+	VIVS_HI_INTR_ENBL,
+	VIVS_HI_CHIP_IDENTITY,
+	VIVS_HI_CHIP_FEATURE,
+	VIVS_HI_CHIP_MODEL,
+	VIVS_HI_CHIP_REV,
+	VIVS_HI_CHIP_DATE,
+	VIVS_HI_CHIP_TIME,
+	VIVS_HI_CHIP_MINOR_FEATURE_0,
+	VIVS_HI_CACHE_CONTROL,
+	VIVS_HI_AXI_CONTROL,
+	VIVS_PM_POWER_CONTROLS,
+	VIVS_PM_MODULE_CONTROLS,
+	VIVS_PM_MODULE_STATUS,
+	VIVS_PM_PULSE_EATER,
+	VIVS_MC_MMU_FE_PAGE_TABLE,
+	VIVS_MC_MMU_TX_PAGE_TABLE,
+	VIVS_MC_MMU_PE_PAGE_TABLE,
+	VIVS_MC_MMU_PEZ_PAGE_TABLE,
+	VIVS_MC_MMU_RA_PAGE_TABLE,
+	VIVS_MC_DEBUG_MEMORY,
+	VIVS_MC_MEMORY_BASE_ADDR_RA,
+	VIVS_MC_MEMORY_BASE_ADDR_FE,
+	VIVS_MC_MEMORY_BASE_ADDR_TX,
+	VIVS_MC_MEMORY_BASE_ADDR_PEZ,
+	VIVS_MC_MEMORY_BASE_ADDR_PE,
+	VIVS_MC_MEMORY_TIMING_CONTROL,
+	VIVS_MC_BUS_CONFIG,
+	VIVS_FE_DMA_STATUS,
+	VIVS_FE_DMA_DEBUG_STATE,
+	VIVS_FE_DMA_ADDRESS,
+	VIVS_FE_DMA_LOW,
+	VIVS_FE_DMA_HIGH,
+	VIVS_FE_AUTO_FLUSH,
+};
+
+static void etnaviv_core_dump_header(struct core_dump_iterator *iter,
+	u32 type, void *data_end)
+{
+	struct etnaviv_dump_object_header *hdr = iter->hdr;
+
+	hdr->magic = cpu_to_le32(ETDUMP_MAGIC);
+	hdr->type = cpu_to_le32(type);
+	hdr->file_offset = cpu_to_le32(iter->data - iter->start);
+	hdr->file_size = cpu_to_le32(data_end - iter->data);
+
+	iter->hdr++;
+	iter->data += hdr->file_size;
+}
+
+static void etnaviv_core_dump_registers(struct core_dump_iterator *iter,
+	struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_dump_registers *reg = iter->data;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(etnaviv_dump_registers); i++, reg++) {
+		reg->reg = etnaviv_dump_registers[i];
+		reg->value = gpu_read(gpu, etnaviv_dump_registers[i]);
+	}
+
+	etnaviv_core_dump_header(iter, ETDUMP_BUF_REG, reg);
+}
+
+static void etnaviv_core_dump_mmu(struct core_dump_iterator *iter,
+	struct etnaviv_gpu *gpu, size_t mmu_size)
+{
+	etnaviv_iommu_dump(gpu->mmu, iter->data);
+
+	etnaviv_core_dump_header(iter, ETDUMP_BUF_MMU, iter->data + mmu_size);
+}
+
+static void etnaviv_core_dump_mem(struct core_dump_iterator *iter, u32 type,
+	void *ptr, size_t size, u64 iova)
+{
+	memcpy(iter->data, ptr, size);
+
+	iter->hdr->iova = cpu_to_le64(iova);
+
+	etnaviv_core_dump_header(iter, type, iter->data + size);
+}
+
+void etnaviv_core_dump(struct etnaviv_gpu *gpu)
+{
+	struct core_dump_iterator iter;
+	struct etnaviv_vram_mapping *vram;
+	struct etnaviv_gem_object *obj;
+	struct etnaviv_cmdbuf *cmd;
+	unsigned int n_obj, n_bomap_pages;
+	size_t file_size, mmu_size;
+	__le64 *bomap, *bomap_start;
+
+	mmu_size = etnaviv_iommu_dump_size(gpu->mmu);
+
+	/* We always dump registers, mmu, ring and end marker */
+	n_obj = 4;
+	n_bomap_pages = 0;
+	file_size = ARRAY_SIZE(etnaviv_dump_registers) *
+			sizeof(struct etnaviv_dump_registers) +
+		    mmu_size + gpu->buffer->size;
+
+	/* Add in the active command buffers */
+	list_for_each_entry(cmd, &gpu->active_cmd_list, node) {
+		file_size += cmd->size;
+		n_obj++;
+	}
+
+	/* Add in the active buffer objects */
+	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
+		if (!vram->use)
+			continue;
+
+		obj = vram->object;
+		file_size += obj->base.size;
+		n_bomap_pages += obj->base.size >> PAGE_SHIFT;
+		n_obj++;
+	}
+
+	/* If we have any buffer objects, add a bomap object */
+	if (n_bomap_pages) {
+		file_size += n_bomap_pages * sizeof(__le64);
+		n_obj++;
+	}
+
+	/* Add the size of the headers */
+	file_size += sizeof(*iter.hdr) * n_obj;
+
+	/* Allocate the file in vmalloc memory, it's likely to be big */
+	iter.start = vmalloc(file_size);
+	if (!iter.start) {
+		dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
+		return;
+	}
+
+	/* Point the data member after the headers */
+	iter.hdr = iter.start;
+	iter.data = &iter.hdr[n_obj];
+
+	memset(iter.hdr, 0, iter.data - iter.start);
+
+	etnaviv_core_dump_registers(&iter, gpu);
+	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
+	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr,
+			      gpu->buffer->size, gpu->buffer->paddr);
+
+	list_for_each_entry(cmd, &gpu->active_cmd_list, node)
+		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr,
+				      cmd->size, cmd->paddr);
+
+	/* Reserve space for the bomap */
+	if (n_bomap_pages) {
+		bomap_start = bomap = iter.data;
+		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
+		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BOMAP,
+					 bomap + n_bomap_pages);
+	} else {
+		/* Silence warning */
+		bomap_start = bomap = NULL;
+	}
+
+	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
+		struct page **pages;
+		void *vaddr;
+
+		if (vram->use == 0)
+			continue;
+
+		obj = vram->object;
+
+		pages = etnaviv_gem_get_pages(obj);
+		if (pages) {
+			int j;
+
+			iter.hdr->data[0] = bomap - bomap_start;
+
+			for (j = 0; j < obj->base.size >> PAGE_SHIFT; j++)
+				*bomap++ = cpu_to_le64(page_to_phys(*pages++));
+		}
+
+		iter.hdr->iova = cpu_to_le64(vram->iova);
+
+		vaddr = etnaviv_gem_vaddr(&obj->base);
+		if (vaddr && !IS_ERR(vaddr))
+			memcpy(iter.data, vaddr, obj->base.size);
+
+		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
+					 obj->base.size);
+	}
+
+	etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
+
+	dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.h b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
new file mode 100644
index 000000000000..97f2f8db9133
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Etnaviv devcoredump file definitions
+ */
+#ifndef ETNAVIV_DUMP_H
+#define ETNAVIV_DUMP_H
+
+#include <linux/types.h>
+
+enum {
+	ETDUMP_MAGIC = 0x414e5445,
+	ETDUMP_BUF_REG = 0,
+	ETDUMP_BUF_MMU,
+	ETDUMP_BUF_RING,
+	ETDUMP_BUF_CMD,
+	ETDUMP_BUF_BOMAP,
+	ETDUMP_BUF_BO,
+	ETDUMP_BUF_END,
+};
+
+struct etnaviv_dump_object_header {
+	__le32 magic;
+	__le32 type;
+	__le32 file_offset;
+	__le32 file_size;
+	__le64 iova;
+	__le32 data[2];
+};
+
+/* Registers object, an array of these */
+struct etnaviv_dump_registers {
+	__le32 reg;
+	__le32 value;
+};
+
+#ifdef __KERNEL__
+struct etnaviv_gpu;
+void etnaviv_core_dump(struct etnaviv_gpu *gpu);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
new file mode 100644
index 000000000000..8d6f859f8200
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -0,0 +1,897 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/shmem_fs.h>
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+
+static void etnaviv_gem_scatter_map(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct drm_device *dev = etnaviv_obj->base.dev;
+	struct sg_table *sgt = etnaviv_obj->sgt;
+
+	/*
+	 * For non-cached buffers, ensure the new pages are clean
+	 * because display controller, GPU, etc. are not coherent.
+	 */
+	if (etnaviv_obj->flags & ETNA_BO_CACHE_MASK)
+		dma_map_sg(dev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL);
+}
+
+static void etnaviv_gem_scatterlist_unmap(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct drm_device *dev = etnaviv_obj->base.dev;
+	struct sg_table *sgt = etnaviv_obj->sgt;
+
+	/*
+	 * For non-cached buffers, ensure the new pages are clean
+	 * because display controller, GPU, etc. are not coherent:
+	 *
+	 * WARNING: The DMA API does not support concurrent CPU
+	 * and device access to the memory area.  With BIDIRECTIONAL,
+	 * we will clean the cache lines which overlap the region,
+	 * and invalidate all cache lines (partially) contained in
+	 * the region.
+	 *
+	 * If you have dirty data in the overlapping cache lines,
+	 * that will corrupt the GPU-written data.  If you have
+	 * written into the remainder of the region, this can
+	 * discard those writes.
+	 */
+	if (etnaviv_obj->flags & ETNA_BO_CACHE_MASK)
+		dma_unmap_sg(dev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL);
+}
+
+/* called with etnaviv_obj->lock held */
+static int etnaviv_gem_shmem_get_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct drm_device *dev = etnaviv_obj->base.dev;
+	struct page **p = drm_gem_get_pages(&etnaviv_obj->base);
+
+	if (IS_ERR(p)) {
+		dev_err(dev->dev, "could not get pages: %ld\n", PTR_ERR(p));
+		return PTR_ERR(p);
+	}
+
+	etnaviv_obj->pages = p;
+
+	return 0;
+}
+
+static void put_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->sgt) {
+		etnaviv_gem_scatterlist_unmap(etnaviv_obj);
+		sg_free_table(etnaviv_obj->sgt);
+		kfree(etnaviv_obj->sgt);
+		etnaviv_obj->sgt = NULL;
+	}
+	if (etnaviv_obj->pages) {
+		drm_gem_put_pages(&etnaviv_obj->base, etnaviv_obj->pages,
+				  true, false);
+
+		etnaviv_obj->pages = NULL;
+	}
+}
+
+struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	int ret;
+
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	if (!etnaviv_obj->pages) {
+		ret = etnaviv_obj->ops->get_pages(etnaviv_obj);
+		if (ret < 0)
+			return ERR_PTR(ret);
+	}
+
+	if (!etnaviv_obj->sgt) {
+		struct drm_device *dev = etnaviv_obj->base.dev;
+		int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+		struct sg_table *sgt;
+
+		sgt = drm_prime_pages_to_sg(etnaviv_obj->pages, npages);
+		if (IS_ERR(sgt)) {
+			dev_err(dev->dev, "failed to allocate sgt: %ld\n",
+				PTR_ERR(sgt));
+			return ERR_CAST(sgt);
+		}
+
+		etnaviv_obj->sgt = sgt;
+
+		etnaviv_gem_scatter_map(etnaviv_obj);
+	}
+
+	return etnaviv_obj->pages;
+}
+
+void etnaviv_gem_put_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	lockdep_assert_held(&etnaviv_obj->lock);
+	/* when we start tracking the pin count, then do something here */
+}
+
+static int etnaviv_gem_mmap_obj(struct drm_gem_object *obj,
+		struct vm_area_struct *vma)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	pgprot_t vm_page_prot;
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	vm_page_prot = vm_get_page_prot(vma->vm_flags);
+
+	if (etnaviv_obj->flags & ETNA_BO_WC) {
+		vma->vm_page_prot = pgprot_writecombine(vm_page_prot);
+	} else if (etnaviv_obj->flags & ETNA_BO_UNCACHED) {
+		vma->vm_page_prot = pgprot_noncached(vm_page_prot);
+	} else {
+		/*
+		 * Shunt off cached objs to shmem file so they have their own
+		 * address_space (so unmap_mapping_range does what we want,
+		 * in particular in the case of mmap'd dmabufs)
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_pgoff = 0;
+		vma->vm_file  = obj->filp;
+
+		vma->vm_page_prot = vm_page_prot;
+	}
+
+	return 0;
+}
+
+int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct etnaviv_gem_object *obj;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret) {
+		DBG("mmap failed: %d", ret);
+		return ret;
+	}
+
+	obj = to_etnaviv_bo(vma->vm_private_data);
+	return etnaviv_gem_mmap_obj(vma->vm_private_data, vma);
+}
+
+int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct page **pages, *page;
+	pgoff_t pgoff;
+	int ret;
+
+	/*
+	 * Make sure we don't parallel update on a fault, nor move or remove
+	 * something from beneath our feet.  Note that vm_insert_page() is
+	 * specifically coded to take care of this, so we don't have to.
+	 */
+	ret = mutex_lock_interruptible(&etnaviv_obj->lock);
+	if (ret)
+		goto out;
+
+	/* make sure we have pages attached now */
+	pages = etnaviv_gem_get_pages(etnaviv_obj);
+	mutex_unlock(&etnaviv_obj->lock);
+
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto out;
+	}
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = ((unsigned long)vmf->virtual_address -
+			vma->vm_start) >> PAGE_SHIFT;
+
+	page = pages[pgoff];
+
+	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+	     page_to_pfn(page), page_to_pfn(page) << PAGE_SHIFT);
+
+	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
+
+out:
+	switch (ret) {
+	case -EAGAIN:
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset)
+{
+	int ret;
+
+	/* Make it mmapable */
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		dev_err(obj->dev->dev, "could not allocate mmap offset\n");
+	else
+		*offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+	return ret;
+}
+
+static struct etnaviv_vram_mapping *
+etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj,
+			     struct etnaviv_iommu *mmu)
+{
+	struct etnaviv_vram_mapping *mapping;
+
+	list_for_each_entry(mapping, &obj->vram_list, obj_node) {
+		if (mapping->mmu == mmu)
+			return mapping;
+	}
+
+	return NULL;
+}
+
+int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
+	struct drm_gem_object *obj, u32 *iova)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_vram_mapping *mapping;
+	struct page **pages;
+	int ret = 0;
+
+	mutex_lock(&etnaviv_obj->lock);
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
+	if (mapping) {
+		/*
+		 * Holding the object lock prevents the use count changing
+		 * beneath us.  If the use count is zero, the MMU might be
+		 * reaping this object, so take the lock and re-check that
+		 * the MMU owns this mapping to close this race.
+		 */
+		if (mapping->use == 0) {
+			mutex_lock(&gpu->mmu->lock);
+			if (mapping->mmu == gpu->mmu)
+				mapping->use += 1;
+			else
+				mapping = NULL;
+			mutex_unlock(&gpu->mmu->lock);
+			if (mapping)
+				goto out;
+		} else {
+			mapping->use += 1;
+			goto out;
+		}
+	}
+
+	pages = etnaviv_gem_get_pages(etnaviv_obj);
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto out;
+	}
+
+	/*
+	 * See if we have a reaped vram mapping we can re-use before
+	 * allocating a fresh mapping.
+	 */
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, NULL);
+	if (!mapping) {
+		mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping)
+			return -ENOMEM;
+
+		INIT_LIST_HEAD(&mapping->scan_node);
+		mapping->object = etnaviv_obj;
+	} else {
+		list_del(&mapping->obj_node);
+	}
+
+	mapping->mmu = gpu->mmu;
+	mapping->use = 1;
+
+	ret = etnaviv_iommu_map_gem(gpu->mmu, etnaviv_obj, gpu->memory_base,
+				    mapping);
+	if (ret < 0)
+		kfree(mapping);
+	else
+		list_add_tail(&mapping->obj_node, &etnaviv_obj->vram_list);
+
+out:
+	mutex_unlock(&etnaviv_obj->lock);
+
+	if (!ret) {
+		/* Take a reference on the object */
+		drm_gem_object_reference(obj);
+		*iova = mapping->iova;
+	}
+
+	return ret;
+}
+
+void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_vram_mapping *mapping;
+
+	mutex_lock(&etnaviv_obj->lock);
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
+
+	WARN_ON(mapping->use == 0);
+	mapping->use -= 1;
+	mutex_unlock(&etnaviv_obj->lock);
+
+	drm_gem_object_unreference_unlocked(obj);
+}
+
+void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	mutex_lock(&etnaviv_obj->lock);
+	if (!etnaviv_obj->vaddr) {
+		struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
+
+		if (IS_ERR(pages))
+			return ERR_CAST(pages);
+
+		etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
+				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	}
+	mutex_unlock(&etnaviv_obj->lock);
+
+	return etnaviv_obj->vaddr;
+}
+
+static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
+{
+	if (op & ETNA_PREP_READ)
+		return DMA_FROM_DEVICE;
+	else if (op & ETNA_PREP_WRITE)
+		return DMA_TO_DEVICE;
+	else
+		return DMA_BIDIRECTIONAL;
+}
+
+int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
+		struct timespec *timeout)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct drm_device *dev = obj->dev;
+	bool write = !!(op & ETNA_PREP_WRITE);
+	int ret;
+
+	if (op & ETNA_PREP_NOSYNC) {
+		if (!reservation_object_test_signaled_rcu(etnaviv_obj->resv,
+							  write))
+			return -EBUSY;
+	} else {
+		unsigned long remain = etnaviv_timeout_to_jiffies(timeout);
+
+		ret = reservation_object_wait_timeout_rcu(etnaviv_obj->resv,
+							  write, true, remain);
+		if (ret <= 0)
+			return ret == 0 ? -ETIMEDOUT : ret;
+	}
+
+	if (etnaviv_obj->flags & ETNA_BO_CACHED) {
+		if (!etnaviv_obj->sgt) {
+			void *ret;
+
+			mutex_lock(&etnaviv_obj->lock);
+			ret = etnaviv_gem_get_pages(etnaviv_obj);
+			mutex_unlock(&etnaviv_obj->lock);
+			if (IS_ERR(ret))
+				return PTR_ERR(ret);
+		}
+
+		dma_sync_sg_for_cpu(dev->dev, etnaviv_obj->sgt->sgl,
+				    etnaviv_obj->sgt->nents,
+				    etnaviv_op_to_dma_dir(op));
+		etnaviv_obj->last_cpu_prep_op = op;
+	}
+
+	return 0;
+}
+
+int etnaviv_gem_cpu_fini(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	if (etnaviv_obj->flags & ETNA_BO_CACHED) {
+		/* fini without a prep is almost certainly a userspace error */
+		WARN_ON(etnaviv_obj->last_cpu_prep_op == 0);
+		dma_sync_sg_for_device(dev->dev, etnaviv_obj->sgt->sgl,
+			etnaviv_obj->sgt->nents,
+			etnaviv_op_to_dma_dir(etnaviv_obj->last_cpu_prep_op));
+		etnaviv_obj->last_cpu_prep_op = 0;
+	}
+
+	return 0;
+}
+
+int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
+	struct timespec *timeout)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	return etnaviv_gpu_wait_obj_inactive(gpu, etnaviv_obj, timeout);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void etnaviv_gem_describe_fence(struct fence *fence,
+	const char *type, struct seq_file *m)
+{
+	if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		seq_printf(m, "\t%9s: %s %s seq %u\n",
+			   type,
+			   fence->ops->get_driver_name(fence),
+			   fence->ops->get_timeline_name(fence),
+			   fence->seqno);
+}
+
+static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct reservation_object *robj = etnaviv_obj->resv;
+	struct reservation_object_list *fobj;
+	struct fence *fence;
+	unsigned long off = drm_vma_node_start(&obj->vma_node);
+
+	seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n",
+			etnaviv_obj->flags, is_active(etnaviv_obj) ? 'A' : 'I',
+			obj->name, obj->refcount.refcount.counter,
+			off, etnaviv_obj->vaddr, obj->size);
+
+	rcu_read_lock();
+	fobj = rcu_dereference(robj->fence);
+	if (fobj) {
+		unsigned int i, shared_count = fobj->shared_count;
+
+		for (i = 0; i < shared_count; i++) {
+			fence = rcu_dereference(fobj->shared[i]);
+			etnaviv_gem_describe_fence(fence, "Shared", m);
+		}
+	}
+
+	fence = rcu_dereference(robj->fence_excl);
+	if (fence)
+		etnaviv_gem_describe_fence(fence, "Exclusive", m);
+	rcu_read_unlock();
+}
+
+void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
+	struct seq_file *m)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	int count = 0;
+	size_t size = 0;
+
+	mutex_lock(&priv->gem_lock);
+	list_for_each_entry(etnaviv_obj, &priv->gem_list, gem_node) {
+		struct drm_gem_object *obj = &etnaviv_obj->base;
+
+		seq_puts(m, "   ");
+		etnaviv_gem_describe(obj, m);
+		count++;
+		size += obj->size;
+	}
+	mutex_unlock(&priv->gem_lock);
+
+	seq_printf(m, "Total %d objects, %zu bytes\n", count, size);
+}
+#endif
+
+static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->vaddr)
+		vunmap(etnaviv_obj->vaddr);
+	put_pages(etnaviv_obj);
+}
+
+static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
+	.get_pages = etnaviv_gem_shmem_get_pages,
+	.release = etnaviv_gem_shmem_release,
+};
+
+void etnaviv_gem_free_object(struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_vram_mapping *mapping, *tmp;
+
+	/* object should not be active */
+	WARN_ON(is_active(etnaviv_obj));
+
+	list_del(&etnaviv_obj->gem_node);
+
+	list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
+				 obj_node) {
+		struct etnaviv_iommu *mmu = mapping->mmu;
+
+		WARN_ON(mapping->use);
+
+		if (mmu)
+			etnaviv_iommu_unmap_gem(mmu, mapping);
+
+		list_del(&mapping->obj_node);
+		kfree(mapping);
+	}
+
+	drm_gem_free_mmap_offset(obj);
+	etnaviv_obj->ops->release(etnaviv_obj);
+	if (etnaviv_obj->resv == &etnaviv_obj->_resv)
+		reservation_object_fini(&etnaviv_obj->_resv);
+	drm_gem_object_release(obj);
+
+	kfree(etnaviv_obj);
+}
+
+int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	mutex_lock(&priv->gem_lock);
+	list_add_tail(&etnaviv_obj->gem_node, &priv->gem_list);
+	mutex_unlock(&priv->gem_lock);
+
+	return 0;
+}
+
+static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags,
+	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
+	struct drm_gem_object **obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	unsigned sz = sizeof(*etnaviv_obj);
+	bool valid = true;
+
+	/* validate flags */
+	switch (flags & ETNA_BO_CACHE_MASK) {
+	case ETNA_BO_UNCACHED:
+	case ETNA_BO_CACHED:
+	case ETNA_BO_WC:
+		break;
+	default:
+		valid = false;
+	}
+
+	if (!valid) {
+		dev_err(dev->dev, "invalid cache flag: %x\n",
+			(flags & ETNA_BO_CACHE_MASK));
+		return -EINVAL;
+	}
+
+	etnaviv_obj = kzalloc(sz, GFP_KERNEL);
+	if (!etnaviv_obj)
+		return -ENOMEM;
+
+	etnaviv_obj->flags = flags;
+	etnaviv_obj->ops = ops;
+	if (robj) {
+		etnaviv_obj->resv = robj;
+	} else {
+		etnaviv_obj->resv = &etnaviv_obj->_resv;
+		reservation_object_init(&etnaviv_obj->_resv);
+	}
+
+	mutex_init(&etnaviv_obj->lock);
+	INIT_LIST_HEAD(&etnaviv_obj->vram_list);
+
+	*obj = &etnaviv_obj->base;
+
+	return 0;
+}
+
+static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
+		u32 size, u32 flags)
+{
+	struct drm_gem_object *obj = NULL;
+	int ret;
+
+	size = PAGE_ALIGN(size);
+
+	ret = etnaviv_gem_new_impl(dev, size, flags, NULL,
+				   &etnaviv_gem_shmem_ops, &obj);
+	if (ret)
+		goto fail;
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret == 0) {
+		struct address_space *mapping;
+
+		/*
+		 * Our buffers are kept pinned, so allocating them
+		 * from the MOVABLE zone is a really bad idea, and
+		 * conflicts with CMA.  See coments above new_inode()
+		 * why this is required _and_ expected if you're
+		 * going to pin these pages.
+		 */
+		mapping = file_inode(obj->filp)->i_mapping;
+		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+	}
+
+	if (ret)
+		goto fail;
+
+	return obj;
+
+fail:
+	if (obj)
+		drm_gem_object_unreference_unlocked(obj);
+
+	return ERR_PTR(ret);
+}
+
+/* convenience method to construct a GEM buffer object, and userspace handle */
+int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		u32 size, u32 flags, u32 *handle)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	obj = __etnaviv_gem_new(dev, size, flags);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	ret = etnaviv_gem_obj_add(dev, obj);
+	if (ret < 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		return ret;
+	}
+
+	ret = drm_gem_handle_create(file, obj, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+struct drm_gem_object *etnaviv_gem_new(struct drm_device *dev,
+		u32 size, u32 flags)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	obj = __etnaviv_gem_new(dev, size, flags);
+	if (IS_ERR(obj))
+		return obj;
+
+	ret = etnaviv_gem_obj_add(dev, obj);
+	if (ret < 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		return ERR_PTR(ret);
+	}
+
+	return obj;
+}
+
+int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
+	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
+	struct etnaviv_gem_object **res)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	ret = etnaviv_gem_new_impl(dev, size, flags, robj, ops, &obj);
+	if (ret)
+		return ret;
+
+	drm_gem_private_object_init(dev, obj, size);
+
+	*res = to_etnaviv_bo(obj);
+
+	return 0;
+}
+
+struct get_pages_work {
+	struct work_struct work;
+	struct mm_struct *mm;
+	struct task_struct *task;
+	struct etnaviv_gem_object *etnaviv_obj;
+};
+
+static struct page **etnaviv_gem_userptr_do_get_pages(
+	struct etnaviv_gem_object *etnaviv_obj, struct mm_struct *mm, struct task_struct *task)
+{
+	int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	uintptr_t ptr;
+
+	pvec = drm_malloc_ab(npages, sizeof(struct page *));
+	if (!pvec)
+		return ERR_PTR(-ENOMEM);
+
+	pinned = 0;
+	ptr = etnaviv_obj->userptr.ptr;
+
+	down_read(&mm->mmap_sem);
+	while (pinned < npages) {
+		ret = get_user_pages(task, mm, ptr, npages - pinned,
+				     !etnaviv_obj->userptr.ro, 0,
+				     pvec + pinned, NULL);
+		if (ret < 0)
+			break;
+
+		ptr += ret * PAGE_SIZE;
+		pinned += ret;
+	}
+	up_read(&mm->mmap_sem);
+
+	if (ret < 0) {
+		release_pages(pvec, pinned, 0);
+		drm_free_large(pvec);
+		return ERR_PTR(ret);
+	}
+
+	return pvec;
+}
+
+static void __etnaviv_gem_userptr_get_pages(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct etnaviv_gem_object *etnaviv_obj = work->etnaviv_obj;
+	struct page **pvec;
+
+	pvec = etnaviv_gem_userptr_do_get_pages(etnaviv_obj, work->mm, work->task);
+
+	mutex_lock(&etnaviv_obj->lock);
+	if (IS_ERR(pvec)) {
+		etnaviv_obj->userptr.work = ERR_CAST(pvec);
+	} else {
+		etnaviv_obj->userptr.work = NULL;
+		etnaviv_obj->pages = pvec;
+	}
+
+	mutex_unlock(&etnaviv_obj->lock);
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+
+	mmput(work->mm);
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct page **pvec = NULL;
+	struct get_pages_work *work;
+	struct mm_struct *mm;
+	int ret, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+
+	if (etnaviv_obj->userptr.work) {
+		if (IS_ERR(etnaviv_obj->userptr.work)) {
+			ret = PTR_ERR(etnaviv_obj->userptr.work);
+			etnaviv_obj->userptr.work = NULL;
+		} else {
+			ret = -EAGAIN;
+		}
+		return ret;
+	}
+
+	mm = get_task_mm(etnaviv_obj->userptr.task);
+	pinned = 0;
+	if (mm == current->mm) {
+		pvec = drm_malloc_ab(npages, sizeof(struct page *));
+		if (!pvec) {
+			mmput(mm);
+			return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages,
+					       !etnaviv_obj->userptr.ro, pvec);
+		if (pinned < 0) {
+			drm_free_large(pvec);
+			mmput(mm);
+			return pinned;
+		}
+
+		if (pinned == npages) {
+			etnaviv_obj->pages = pvec;
+			mmput(mm);
+			return 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work) {
+		mmput(mm);
+		return -ENOMEM;
+	}
+
+	get_task_struct(current);
+	drm_gem_object_reference(&etnaviv_obj->base);
+
+	work->mm = mm;
+	work->task = current;
+	work->etnaviv_obj = etnaviv_obj;
+
+	etnaviv_obj->userptr.work = &work->work;
+	INIT_WORK(&work->work, __etnaviv_gem_userptr_get_pages);
+
+	etnaviv_queue_work(etnaviv_obj->base.dev, &work->work);
+
+	return -EAGAIN;
+}
+
+static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->sgt) {
+		etnaviv_gem_scatterlist_unmap(etnaviv_obj);
+		sg_free_table(etnaviv_obj->sgt);
+		kfree(etnaviv_obj->sgt);
+	}
+	if (etnaviv_obj->pages) {
+		int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+
+		release_pages(etnaviv_obj->pages, npages, 0);
+		drm_free_large(etnaviv_obj->pages);
+	}
+	put_task_struct(etnaviv_obj->userptr.task);
+}
+
+static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
+	.get_pages = etnaviv_gem_userptr_get_pages,
+	.release = etnaviv_gem_userptr_release,
+};
+
+int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
+	uintptr_t ptr, u32 size, u32 flags, u32 *handle)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	int ret;
+
+	ret = etnaviv_gem_new_private(dev, size, ETNA_BO_CACHED, NULL,
+				      &etnaviv_gem_userptr_ops, &etnaviv_obj);
+	if (ret)
+		return ret;
+
+	etnaviv_obj->userptr.ptr = ptr;
+	etnaviv_obj->userptr.task = current;
+	etnaviv_obj->userptr.ro = !(flags & ETNA_USERPTR_WRITE);
+	get_task_struct(current);
+
+	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
+	if (ret) {
+		drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+		return ret;
+	}
+
+	ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
new file mode 100644
index 000000000000..a300b4b3d545
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_GEM_H__
+#define __ETNAVIV_GEM_H__
+
+#include <linux/reservation.h>
+#include "etnaviv_drv.h"
+
+struct etnaviv_gem_ops;
+struct etnaviv_gem_object;
+
+struct etnaviv_gem_userptr {
+	uintptr_t ptr;
+	struct task_struct *task;
+	struct work_struct *work;
+	bool ro;
+};
+
+struct etnaviv_vram_mapping {
+	struct list_head obj_node;
+	struct list_head scan_node;
+	struct list_head mmu_node;
+	struct etnaviv_gem_object *object;
+	struct etnaviv_iommu *mmu;
+	struct drm_mm_node vram_node;
+	unsigned int use;
+	u32 iova;
+};
+
+struct etnaviv_gem_object {
+	struct drm_gem_object base;
+	const struct etnaviv_gem_ops *ops;
+	struct mutex lock;
+
+	u32 flags;
+
+	struct list_head gem_node;
+	struct etnaviv_gpu *gpu;     /* non-null if active */
+	atomic_t gpu_active;
+	u32 access;
+
+	struct page **pages;
+	struct sg_table *sgt;
+	void *vaddr;
+
+	/* normally (resv == &_resv) except for imported bo's */
+	struct reservation_object *resv;
+	struct reservation_object _resv;
+
+	struct list_head vram_list;
+
+	/* cache maintenance */
+	u32 last_cpu_prep_op;
+
+	struct etnaviv_gem_userptr userptr;
+};
+
+static inline
+struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct etnaviv_gem_object, base);
+}
+
+struct etnaviv_gem_ops {
+	int (*get_pages)(struct etnaviv_gem_object *);
+	void (*release)(struct etnaviv_gem_object *);
+};
+
+static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
+{
+	return atomic_read(&etnaviv_obj->gpu_active) != 0;
+}
+
+#define MAX_CMDS 4
+
+/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
+ * associated with the cmdstream submission for synchronization (and
+ * make it easier to unwind when things go wrong, etc).  This only
+ * lasts for the duration of the submit-ioctl.
+ */
+struct etnaviv_gem_submit {
+	struct drm_device *dev;
+	struct etnaviv_gpu *gpu;
+	struct ww_acquire_ctx ticket;
+	u32 fence;
+	unsigned int nr_bos;
+	struct {
+		u32 flags;
+		struct etnaviv_gem_object *obj;
+		u32 iova;
+	} bos[0];
+};
+
+int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
+	struct timespec *timeout);
+int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
+	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
+	struct etnaviv_gem_object **res);
+int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
+struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
+void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
+
+#endif /* __ETNAVIV_GEM_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
new file mode 100644
index 000000000000..e94db4f95770
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-buf.h>
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+
+
+struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	BUG_ON(!etnaviv_obj->sgt);  /* should have already pinned! */
+
+	return etnaviv_obj->sgt;
+}
+
+void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
+{
+	return etnaviv_gem_vaddr(obj);
+}
+
+void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	/* TODO msm_gem_vunmap() */
+}
+
+int etnaviv_gem_prime_pin(struct drm_gem_object *obj)
+{
+	if (!obj->import_attach) {
+		struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+		mutex_lock(&etnaviv_obj->lock);
+		etnaviv_gem_get_pages(etnaviv_obj);
+		mutex_unlock(&etnaviv_obj->lock);
+	}
+	return 0;
+}
+
+void etnaviv_gem_prime_unpin(struct drm_gem_object *obj)
+{
+	if (!obj->import_attach) {
+		struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+		mutex_lock(&etnaviv_obj->lock);
+		etnaviv_gem_put_pages(to_etnaviv_bo(obj));
+		mutex_unlock(&etnaviv_obj->lock);
+	}
+}
+
+static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->vaddr)
+		dma_buf_vunmap(etnaviv_obj->base.import_attach->dmabuf,
+			       etnaviv_obj->vaddr);
+
+	/* Don't drop the pages for imported dmabuf, as they are not
+	 * ours, just free the array we allocated:
+	 */
+	if (etnaviv_obj->pages)
+		drm_free_large(etnaviv_obj->pages);
+
+	drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
+}
+
+static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
+	/* .get_pages should never be called */
+	.release = etnaviv_gem_prime_release,
+};
+
+struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
+	struct dma_buf_attachment *attach, struct sg_table *sgt)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	size_t size = PAGE_ALIGN(attach->dmabuf->size);
+	int ret, npages;
+
+	ret = etnaviv_gem_new_private(dev, size, ETNA_BO_WC,
+				      attach->dmabuf->resv,
+				      &etnaviv_gem_prime_ops, &etnaviv_obj);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	npages = size / PAGE_SIZE;
+
+	etnaviv_obj->sgt = sgt;
+	etnaviv_obj->pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (!etnaviv_obj->pages) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	ret = drm_prime_sg_to_page_addr_arrays(sgt, etnaviv_obj->pages,
+					       NULL, npages);
+	if (ret)
+		goto fail;
+
+	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
+	if (ret)
+		goto fail;
+
+	return &etnaviv_obj->base;
+
+fail:
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
new file mode 100644
index 000000000000..1aba01a999df
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/reservation.h>
+#include "etnaviv_drv.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+
+/*
+ * Cmdstream submission:
+ */
+
+#define BO_INVALID_FLAGS ~(ETNA_SUBMIT_BO_READ | ETNA_SUBMIT_BO_WRITE)
+/* make sure these don't conflict w/ ETNAVIV_SUBMIT_BO_x */
+#define BO_LOCKED   0x4000
+#define BO_PINNED   0x2000
+
+static inline void __user *to_user_ptr(u64 address)
+{
+	return (void __user *)(uintptr_t)address;
+}
+
+static struct etnaviv_gem_submit *submit_create(struct drm_device *dev,
+		struct etnaviv_gpu *gpu, size_t nr)
+{
+	struct etnaviv_gem_submit *submit;
+	size_t sz = size_vstruct(nr, sizeof(submit->bos[0]), sizeof(*submit));
+
+	submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (submit) {
+		submit->dev = dev;
+		submit->gpu = gpu;
+
+		/* initially, until copy_from_user() and bo lookup succeeds: */
+		submit->nr_bos = 0;
+
+		ww_acquire_init(&submit->ticket, &reservation_ww_class);
+	}
+
+	return submit;
+}
+
+static int submit_lookup_objects(struct etnaviv_gem_submit *submit,
+	struct drm_file *file, struct drm_etnaviv_gem_submit_bo *submit_bos,
+	unsigned nr_bos)
+{
+	struct drm_etnaviv_gem_submit_bo *bo;
+	unsigned i;
+	int ret = 0;
+
+	spin_lock(&file->table_lock);
+
+	for (i = 0, bo = submit_bos; i < nr_bos; i++, bo++) {
+		struct drm_gem_object *obj;
+
+		if (bo->flags & BO_INVALID_FLAGS) {
+			DRM_ERROR("invalid flags: %x\n", bo->flags);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		submit->bos[i].flags = bo->flags;
+
+		/* normally use drm_gem_object_lookup(), but for bulk lookup
+		 * all under single table_lock just hit object_idr directly:
+		 */
+		obj = idr_find(&file->object_idr, bo->handle);
+		if (!obj) {
+			DRM_ERROR("invalid handle %u at index %u\n",
+				  bo->handle, i);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		/*
+		 * Take a refcount on the object. The file table lock
+		 * prevents the object_idr's refcount on this being dropped.
+		 */
+		drm_gem_object_reference(obj);
+
+		submit->bos[i].obj = to_etnaviv_bo(obj);
+	}
+
+out_unlock:
+	submit->nr_bos = i;
+	spin_unlock(&file->table_lock);
+
+	return ret;
+}
+
+static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i)
+{
+	if (submit->bos[i].flags & BO_LOCKED) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		ww_mutex_unlock(&etnaviv_obj->resv->lock);
+		submit->bos[i].flags &= ~BO_LOCKED;
+	}
+}
+
+static int submit_lock_objects(struct etnaviv_gem_submit *submit)
+{
+	int contended, slow_locked = -1, i, ret = 0;
+
+retry:
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		if (slow_locked == i)
+			slow_locked = -1;
+
+		contended = i;
+
+		if (!(submit->bos[i].flags & BO_LOCKED)) {
+			ret = ww_mutex_lock_interruptible(&etnaviv_obj->resv->lock,
+					&submit->ticket);
+			if (ret == -EALREADY)
+				DRM_ERROR("BO at index %u already on submit list\n",
+					  i);
+			if (ret)
+				goto fail;
+			submit->bos[i].flags |= BO_LOCKED;
+		}
+	}
+
+	ww_acquire_done(&submit->ticket);
+
+	return 0;
+
+fail:
+	for (; i >= 0; i--)
+		submit_unlock_object(submit, i);
+
+	if (slow_locked > 0)
+		submit_unlock_object(submit, slow_locked);
+
+	if (ret == -EDEADLK) {
+		struct etnaviv_gem_object *etnaviv_obj;
+
+		etnaviv_obj = submit->bos[contended].obj;
+
+		/* we lost out in a seqno race, lock and retry.. */
+		ret = ww_mutex_lock_slow_interruptible(&etnaviv_obj->resv->lock,
+				&submit->ticket);
+		if (!ret) {
+			submit->bos[contended].flags |= BO_LOCKED;
+			slow_locked = contended;
+			goto retry;
+		}
+	}
+
+	return ret;
+}
+
+static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
+{
+	unsigned int context = submit->gpu->fence_context;
+	int i, ret = 0;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
+
+		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void submit_unpin_objects(struct etnaviv_gem_submit *submit)
+{
+	int i;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		if (submit->bos[i].flags & BO_PINNED)
+			etnaviv_gem_put_iova(submit->gpu, &etnaviv_obj->base);
+
+		submit->bos[i].iova = 0;
+		submit->bos[i].flags &= ~BO_PINNED;
+	}
+}
+
+static int submit_pin_objects(struct etnaviv_gem_submit *submit)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		u32 iova;
+
+		ret = etnaviv_gem_get_iova(submit->gpu, &etnaviv_obj->base,
+					   &iova);
+		if (ret)
+			break;
+
+		submit->bos[i].flags |= BO_PINNED;
+		submit->bos[i].iova = iova;
+	}
+
+	return ret;
+}
+
+static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx,
+		struct etnaviv_gem_object **obj, u32 *iova)
+{
+	if (idx >= submit->nr_bos) {
+		DRM_ERROR("invalid buffer index: %u (out of %u)\n",
+				idx, submit->nr_bos);
+		return -EINVAL;
+	}
+
+	if (obj)
+		*obj = submit->bos[idx].obj;
+	if (iova)
+		*iova = submit->bos[idx].iova;
+
+	return 0;
+}
+
+/* process the reloc's and patch up the cmdstream as needed: */
+static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
+		u32 size, const struct drm_etnaviv_gem_submit_reloc *relocs,
+		u32 nr_relocs)
+{
+	u32 i, last_offset = 0;
+	u32 *ptr = stream;
+	int ret;
+
+	for (i = 0; i < nr_relocs; i++) {
+		const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
+		struct etnaviv_gem_object *bobj;
+		u32 iova, off;
+
+		if (unlikely(r->flags)) {
+			DRM_ERROR("invalid reloc flags\n");
+			return -EINVAL;
+		}
+
+		if (r->submit_offset % 4) {
+			DRM_ERROR("non-aligned reloc offset: %u\n",
+				  r->submit_offset);
+			return -EINVAL;
+		}
+
+		/* offset in dwords: */
+		off = r->submit_offset / 4;
+
+		if ((off >= size ) ||
+				(off < last_offset)) {
+			DRM_ERROR("invalid offset %u at reloc %u\n", off, i);
+			return -EINVAL;
+		}
+
+		ret = submit_bo(submit, r->reloc_idx, &bobj, &iova);
+		if (ret)
+			return ret;
+
+		if (r->reloc_offset >=
+		    bobj->base.size - sizeof(*ptr)) {
+			DRM_ERROR("relocation %u outside object", i);
+			return -EINVAL;
+		}
+
+		ptr[off] = iova + r->reloc_offset;
+
+		last_offset = off;
+	}
+
+	return 0;
+}
+
+static void submit_cleanup(struct etnaviv_gem_submit *submit)
+{
+	unsigned i;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		submit_unlock_object(submit, i);
+		drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+	}
+
+	ww_acquire_fini(&submit->ticket);
+	kfree(submit);
+}
+
+int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct drm_etnaviv_gem_submit *args = data;
+	struct drm_etnaviv_gem_submit_reloc *relocs;
+	struct drm_etnaviv_gem_submit_bo *bos;
+	struct etnaviv_gem_submit *submit;
+	struct etnaviv_cmdbuf *cmdbuf;
+	struct etnaviv_gpu *gpu;
+	void *stream;
+	int ret;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	if (args->stream_size % 4) {
+		DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
+			  args->stream_size);
+		return -EINVAL;
+	}
+
+	if (args->exec_state != ETNA_PIPE_3D &&
+	    args->exec_state != ETNA_PIPE_2D &&
+	    args->exec_state != ETNA_PIPE_VG) {
+		DRM_ERROR("invalid exec_state: 0x%x\n", args->exec_state);
+		return -EINVAL;
+	}
+
+	/*
+	 * Copy the command submission and bo array to kernel space in
+	 * one go, and do this outside of any locks.
+	 */
+	bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
+	relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
+	stream = drm_malloc_ab(1, args->stream_size);
+	cmdbuf = etnaviv_gpu_cmdbuf_new(gpu, ALIGN(args->stream_size, 8) + 8,
+					args->nr_bos);
+	if (!bos || !relocs || !stream || !cmdbuf) {
+		ret = -ENOMEM;
+		goto err_submit_cmds;
+	}
+
+	cmdbuf->exec_state = args->exec_state;
+	cmdbuf->ctx = file->driver_priv;
+
+	ret = copy_from_user(bos, to_user_ptr(args->bos),
+			     args->nr_bos * sizeof(*bos));
+	if (ret) {
+		ret = -EFAULT;
+		goto err_submit_cmds;
+	}
+
+	ret = copy_from_user(relocs, to_user_ptr(args->relocs),
+			     args->nr_relocs * sizeof(*relocs));
+	if (ret) {
+		ret = -EFAULT;
+		goto err_submit_cmds;
+	}
+
+	ret = copy_from_user(stream, to_user_ptr(args->stream),
+			     args->stream_size);
+	if (ret) {
+		ret = -EFAULT;
+		goto err_submit_cmds;
+	}
+
+	submit = submit_create(dev, gpu, args->nr_bos);
+	if (!submit) {
+		ret = -ENOMEM;
+		goto err_submit_cmds;
+	}
+
+	ret = submit_lookup_objects(submit, file, bos, args->nr_bos);
+	if (ret)
+		goto err_submit_objects;
+
+	ret = submit_lock_objects(submit);
+	if (ret)
+		goto err_submit_objects;
+
+	if (!etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
+				      relocs, args->nr_relocs)) {
+		ret = -EINVAL;
+		goto err_submit_objects;
+	}
+
+	ret = submit_fence_sync(submit);
+	if (ret)
+		goto err_submit_objects;
+
+	ret = submit_pin_objects(submit);
+	if (ret)
+		goto out;
+
+	ret = submit_reloc(submit, stream, args->stream_size / 4,
+			   relocs, args->nr_relocs);
+	if (ret)
+		goto out;
+
+	memcpy(cmdbuf->vaddr, stream, args->stream_size);
+	cmdbuf->user_size = ALIGN(args->stream_size, 8);
+
+	ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
+	if (ret == 0)
+		cmdbuf = NULL;
+
+	args->fence = submit->fence;
+
+out:
+	submit_unpin_objects(submit);
+
+	/*
+	 * If we're returning -EAGAIN, it may be due to the userptr code
+	 * wanting to run its workqueue outside of any locks. Flush our
+	 * workqueue to ensure that it is run in a timely manner.
+	 */
+	if (ret == -EAGAIN)
+		flush_workqueue(priv->wq);
+
+err_submit_objects:
+	submit_cleanup(submit);
+
+err_submit_cmds:
+	/* if we still own the cmdbuf */
+	if (cmdbuf)
+		etnaviv_gpu_cmdbuf_free(cmdbuf);
+	if (stream)
+		drm_free_large(stream);
+	if (bos)
+		drm_free_large(bos);
+	if (relocs)
+		drm_free_large(relocs);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
new file mode 100644
index 000000000000..d39093dc37e6
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -0,0 +1,1644 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/component.h>
+#include <linux/fence.h>
+#include <linux/moduleparam.h>
+#include <linux/of_device.h>
+#include "etnaviv_dump.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
+#include "etnaviv_iommu.h"
+#include "etnaviv_iommu_v2.h"
+#include "common.xml.h"
+#include "state.xml.h"
+#include "state_hi.xml.h"
+#include "cmdstream.xml.h"
+
+static const struct platform_device_id gpu_ids[] = {
+	{ .name = "etnaviv-gpu,2d" },
+	{ },
+};
+
+static bool etnaviv_dump_core = true;
+module_param_named(dump_core, etnaviv_dump_core, bool, 0600);
+
+/*
+ * Driver functions:
+ */
+
+int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
+{
+	switch (param) {
+	case ETNAVIV_PARAM_GPU_MODEL:
+		*value = gpu->identity.model;
+		break;
+
+	case ETNAVIV_PARAM_GPU_REVISION:
+		*value = gpu->identity.revision;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_0:
+		*value = gpu->identity.features;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_1:
+		*value = gpu->identity.minor_features0;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_2:
+		*value = gpu->identity.minor_features1;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_3:
+		*value = gpu->identity.minor_features2;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_4:
+		*value = gpu->identity.minor_features3;
+		break;
+
+	case ETNAVIV_PARAM_GPU_STREAM_COUNT:
+		*value = gpu->identity.stream_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_REGISTER_MAX:
+		*value = gpu->identity.register_max;
+		break;
+
+	case ETNAVIV_PARAM_GPU_THREAD_COUNT:
+		*value = gpu->identity.thread_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE:
+		*value = gpu->identity.vertex_cache_size;
+		break;
+
+	case ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT:
+		*value = gpu->identity.shader_core_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_PIXEL_PIPES:
+		*value = gpu->identity.pixel_pipes;
+		break;
+
+	case ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE:
+		*value = gpu->identity.vertex_output_buffer_size;
+		break;
+
+	case ETNAVIV_PARAM_GPU_BUFFER_SIZE:
+		*value = gpu->identity.buffer_size;
+		break;
+
+	case ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT:
+		*value = gpu->identity.instruction_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_NUM_CONSTANTS:
+		*value = gpu->identity.num_constants;
+		break;
+
+	default:
+		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
+{
+	if (gpu->identity.minor_features0 &
+	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
+		u32 specs[2];
+
+		specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
+		specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
+
+		gpu->identity.stream_count =
+			(specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
+		gpu->identity.register_max =
+			(specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
+				>> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
+		gpu->identity.thread_count =
+			(specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
+		gpu->identity.vertex_cache_size =
+			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
+				>> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
+		gpu->identity.shader_core_count =
+			(specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
+		gpu->identity.pixel_pipes =
+			(specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
+				>> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+		gpu->identity.vertex_output_buffer_size =
+			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
+				>> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
+
+		gpu->identity.buffer_size =
+			(specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
+				>> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
+		gpu->identity.instruction_count =
+			(specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
+		gpu->identity.num_constants =
+			(specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
+				>> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+	}
+
+	/* Fill in the stream count if not specified */
+	if (gpu->identity.stream_count == 0) {
+		if (gpu->identity.model >= 0x1000)
+			gpu->identity.stream_count = 4;
+		else
+			gpu->identity.stream_count = 1;
+	}
+
+	/* Convert the register max value */
+	if (gpu->identity.register_max)
+		gpu->identity.register_max = 1 << gpu->identity.register_max;
+	else if (gpu->identity.model == 0x0400)
+		gpu->identity.register_max = 32;
+	else
+		gpu->identity.register_max = 64;
+
+	/* Convert thread count */
+	if (gpu->identity.thread_count)
+		gpu->identity.thread_count = 1 << gpu->identity.thread_count;
+	else if (gpu->identity.model == 0x0400)
+		gpu->identity.thread_count = 64;
+	else if (gpu->identity.model == 0x0500 ||
+		 gpu->identity.model == 0x0530)
+		gpu->identity.thread_count = 128;
+	else
+		gpu->identity.thread_count = 256;
+
+	if (gpu->identity.vertex_cache_size == 0)
+		gpu->identity.vertex_cache_size = 8;
+
+	if (gpu->identity.shader_core_count == 0) {
+		if (gpu->identity.model >= 0x1000)
+			gpu->identity.shader_core_count = 2;
+		else
+			gpu->identity.shader_core_count = 1;
+	}
+
+	if (gpu->identity.pixel_pipes == 0)
+		gpu->identity.pixel_pipes = 1;
+
+	/* Convert virtex buffer size */
+	if (gpu->identity.vertex_output_buffer_size) {
+		gpu->identity.vertex_output_buffer_size =
+			1 << gpu->identity.vertex_output_buffer_size;
+	} else if (gpu->identity.model == 0x0400) {
+		if (gpu->identity.revision < 0x4000)
+			gpu->identity.vertex_output_buffer_size = 512;
+		else if (gpu->identity.revision < 0x4200)
+			gpu->identity.vertex_output_buffer_size = 256;
+		else
+			gpu->identity.vertex_output_buffer_size = 128;
+	} else {
+		gpu->identity.vertex_output_buffer_size = 512;
+	}
+
+	switch (gpu->identity.instruction_count) {
+	case 0:
+		if ((gpu->identity.model == 0x2000 &&
+		     gpu->identity.revision == 0x5108) ||
+		    gpu->identity.model == 0x880)
+			gpu->identity.instruction_count = 512;
+		else
+			gpu->identity.instruction_count = 256;
+		break;
+
+	case 1:
+		gpu->identity.instruction_count = 1024;
+		break;
+
+	case 2:
+		gpu->identity.instruction_count = 2048;
+		break;
+
+	default:
+		gpu->identity.instruction_count = 256;
+		break;
+	}
+
+	if (gpu->identity.num_constants == 0)
+		gpu->identity.num_constants = 168;
+}
+
+static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
+{
+	u32 chipIdentity;
+
+	chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
+
+	/* Special case for older graphic cores. */
+	if (VIVS_HI_CHIP_IDENTITY_FAMILY(chipIdentity) ==  0x01) {
+		gpu->identity.model    = 0x500; /* gc500 */
+		gpu->identity.revision = VIVS_HI_CHIP_IDENTITY_REVISION(chipIdentity);
+	} else {
+
+		gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
+		gpu->identity.revision = gpu_read(gpu, VIVS_HI_CHIP_REV);
+
+		/*
+		 * !!!! HACK ALERT !!!!
+		 * Because people change device IDs without letting software
+		 * know about it - here is the hack to make it all look the
+		 * same.  Only for GC400 family.
+		 */
+		if ((gpu->identity.model & 0xff00) == 0x0400 &&
+		    gpu->identity.model != 0x0420) {
+			gpu->identity.model = gpu->identity.model & 0x0400;
+		}
+
+		/* Another special case */
+		if (gpu->identity.model == 0x300 &&
+		    gpu->identity.revision == 0x2201) {
+			u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
+			u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
+
+			if (chipDate == 0x20080814 && chipTime == 0x12051100) {
+				/*
+				 * This IP has an ECO; put the correct
+				 * revision in it.
+				 */
+				gpu->identity.revision = 0x1051;
+			}
+		}
+	}
+
+	dev_info(gpu->dev, "model: GC%x, revision: %x\n",
+		 gpu->identity.model, gpu->identity.revision);
+
+	gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
+
+	/* Disable fast clear on GC700. */
+	if (gpu->identity.model == 0x700)
+		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
+
+	if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
+	    (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+
+		/*
+		 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
+		 * registers.
+		 */
+		gpu->identity.minor_features0 = 0;
+		gpu->identity.minor_features1 = 0;
+		gpu->identity.minor_features2 = 0;
+		gpu->identity.minor_features3 = 0;
+	} else
+		gpu->identity.minor_features0 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
+
+	if (gpu->identity.minor_features0 &
+	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
+		gpu->identity.minor_features1 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_1);
+		gpu->identity.minor_features2 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
+		gpu->identity.minor_features3 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+	}
+
+	/* GC600 idle register reports zero bits where modules aren't present */
+	if (gpu->identity.model == chipModel_GC600) {
+		gpu->idle_mask = VIVS_HI_IDLE_STATE_TX |
+				 VIVS_HI_IDLE_STATE_RA |
+				 VIVS_HI_IDLE_STATE_SE |
+				 VIVS_HI_IDLE_STATE_PA |
+				 VIVS_HI_IDLE_STATE_SH |
+				 VIVS_HI_IDLE_STATE_PE |
+				 VIVS_HI_IDLE_STATE_DE |
+				 VIVS_HI_IDLE_STATE_FE;
+	} else {
+		gpu->idle_mask = ~VIVS_HI_IDLE_STATE_AXI_LP;
+	}
+
+	etnaviv_hw_specs(gpu);
+}
+
+static void etnaviv_gpu_load_clock(struct etnaviv_gpu *gpu, u32 clock)
+{
+	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock |
+		  VIVS_HI_CLOCK_CONTROL_FSCALE_CMD_LOAD);
+	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock);
+}
+
+static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
+{
+	u32 control, idle;
+	unsigned long timeout;
+	bool failed = true;
+
+	/* TODO
+	 *
+	 * - clock gating
+	 * - puls eater
+	 * - what about VG?
+	 */
+
+	/* We hope that the GPU resets in under one second */
+	timeout = jiffies + msecs_to_jiffies(1000);
+
+	while (time_is_after_jiffies(timeout)) {
+		control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+			  VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
+
+		/* enable clock */
+		etnaviv_gpu_load_clock(gpu, control);
+
+		/* Wait for stable clock.  Vivante's code waited for 1ms */
+		usleep_range(1000, 10000);
+
+		/* isolate the GPU. */
+		control |= VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* set soft reset. */
+		control |= VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* wait for reset. */
+		msleep(1);
+
+		/* reset soft reset bit. */
+		control &= ~VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* reset GPU isolation. */
+		control &= ~VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* read idle register. */
+		idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+
+		/* try reseting again if FE it not idle */
+		if ((idle & VIVS_HI_IDLE_STATE_FE) == 0) {
+			dev_dbg(gpu->dev, "FE is not idle\n");
+			continue;
+		}
+
+		/* read reset register. */
+		control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
+
+		/* is the GPU idle? */
+		if (((control & VIVS_HI_CLOCK_CONTROL_IDLE_3D) == 0) ||
+		    ((control & VIVS_HI_CLOCK_CONTROL_IDLE_2D) == 0)) {
+			dev_dbg(gpu->dev, "GPU is not idle\n");
+			continue;
+		}
+
+		failed = false;
+		break;
+	}
+
+	if (failed) {
+		idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+		control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
+
+		dev_err(gpu->dev, "GPU failed to reset: FE %sidle, 3D %sidle, 2D %sidle\n",
+			idle & VIVS_HI_IDLE_STATE_FE ? "" : "not ",
+			control & VIVS_HI_CLOCK_CONTROL_IDLE_3D ? "" : "not ",
+			control & VIVS_HI_CLOCK_CONTROL_IDLE_2D ? "" : "not ");
+
+		return -EBUSY;
+	}
+
+	/* We rely on the GPU running, so program the clock */
+	control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+		  VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
+
+	/* enable clock */
+	etnaviv_gpu_load_clock(gpu, control);
+
+	return 0;
+}
+
+static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
+{
+	u16 prefetch;
+
+	if (gpu->identity.model == chipModel_GC320 &&
+	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
+	    (gpu->identity.revision == 0x5007 ||
+	     gpu->identity.revision == 0x5220)) {
+		u32 mc_memory_debug;
+
+		mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
+
+		if (gpu->identity.revision == 0x5007)
+			mc_memory_debug |= 0x0c;
+		else
+			mc_memory_debug |= 0x08;
+
+		gpu_write(gpu, VIVS_MC_DEBUG_MEMORY, mc_memory_debug);
+	}
+
+	/*
+	 * Update GPU AXI cache atttribute to "cacheable, no allocate".
+	 * This is necessary to prevent the iMX6 SoC locking up.
+	 */
+	gpu_write(gpu, VIVS_HI_AXI_CONFIG,
+		  VIVS_HI_AXI_CONFIG_AWCACHE(2) |
+		  VIVS_HI_AXI_CONFIG_ARCACHE(2));
+
+	/* GC2000 rev 5108 needs a special bus config */
+	if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+		u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
+		bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
+				VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
+		bus_config |= VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG(1) |
+			      VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG(0);
+		gpu_write(gpu, VIVS_MC_BUS_CONFIG, bus_config);
+	}
+
+	/* set base addresses */
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
+
+	/* setup the MMU page table pointers */
+	etnaviv_iommu_domain_restore(gpu, gpu->mmu->domain);
+
+	/* Start command processor */
+	prefetch = etnaviv_buffer_init(gpu);
+
+	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
+	gpu_write(gpu, VIVS_FE_COMMAND_ADDRESS,
+		  gpu->buffer->paddr - gpu->memory_base);
+	gpu_write(gpu, VIVS_FE_COMMAND_CONTROL,
+		  VIVS_FE_COMMAND_CONTROL_ENABLE |
+		  VIVS_FE_COMMAND_CONTROL_PREFETCH(prefetch));
+}
+
+int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
+{
+	int ret, i;
+	struct iommu_domain *iommu;
+	enum etnaviv_iommu_version version;
+	bool mmuv2;
+
+	ret = pm_runtime_get_sync(gpu->dev);
+	if (ret < 0)
+		return ret;
+
+	etnaviv_hw_identify(gpu);
+
+	if (gpu->identity.model == 0) {
+		dev_err(gpu->dev, "Unknown GPU model\n");
+		pm_runtime_put_autosuspend(gpu->dev);
+		return -ENXIO;
+	}
+
+	ret = etnaviv_hw_reset(gpu);
+	if (ret)
+		goto fail;
+
+	/* Setup IOMMU.. eventually we will (I think) do this once per context
+	 * and have separate page tables per context.  For now, to keep things
+	 * simple and to get something working, just use a single address space:
+	 */
+	mmuv2 = gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION;
+	dev_dbg(gpu->dev, "mmuv2: %d\n", mmuv2);
+
+	if (!mmuv2) {
+		iommu = etnaviv_iommu_domain_alloc(gpu);
+		version = ETNAVIV_IOMMU_V1;
+	} else {
+		iommu = etnaviv_iommu_v2_domain_alloc(gpu);
+		version = ETNAVIV_IOMMU_V2;
+	}
+
+	if (!iommu) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* TODO: we will leak here memory - fix it! */
+
+	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
+	if (!gpu->mmu) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Create buffer: */
+	gpu->buffer = etnaviv_gpu_cmdbuf_new(gpu, PAGE_SIZE, 0);
+	if (!gpu->buffer) {
+		ret = -ENOMEM;
+		dev_err(gpu->dev, "could not create command buffer\n");
+		goto fail;
+	}
+	if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
+		ret = -EINVAL;
+		dev_err(gpu->dev,
+			"command buffer outside valid memory window\n");
+		goto free_buffer;
+	}
+
+	/* Setup event management */
+	spin_lock_init(&gpu->event_spinlock);
+	init_completion(&gpu->event_free);
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
+		gpu->event[i].used = false;
+		complete(&gpu->event_free);
+	}
+
+	/* Now program the hardware */
+	mutex_lock(&gpu->lock);
+	etnaviv_gpu_hw_init(gpu);
+	mutex_unlock(&gpu->lock);
+
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return 0;
+
+free_buffer:
+	etnaviv_gpu_cmdbuf_free(gpu->buffer);
+	gpu->buffer = NULL;
+fail:
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct dma_debug {
+	u32 address[2];
+	u32 state[2];
+};
+
+static void verify_dma(struct etnaviv_gpu *gpu, struct dma_debug *debug)
+{
+	u32 i;
+
+	debug->address[0] = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+	debug->state[0]   = gpu_read(gpu, VIVS_FE_DMA_DEBUG_STATE);
+
+	for (i = 0; i < 500; i++) {
+		debug->address[1] = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+		debug->state[1]   = gpu_read(gpu, VIVS_FE_DMA_DEBUG_STATE);
+
+		if (debug->address[0] != debug->address[1])
+			break;
+
+		if (debug->state[0] != debug->state[1])
+			break;
+	}
+}
+
+int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	struct dma_debug debug;
+	u32 dma_lo, dma_hi, axi, idle;
+	int ret;
+
+	seq_printf(m, "%s Status:\n", dev_name(gpu->dev));
+
+	ret = pm_runtime_get_sync(gpu->dev);
+	if (ret < 0)
+		return ret;
+
+	dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
+	dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
+	axi = gpu_read(gpu, VIVS_HI_AXI_STATUS);
+	idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+
+	verify_dma(gpu, &debug);
+
+	seq_puts(m, "\tfeatures\n");
+	seq_printf(m, "\t minor_features0: 0x%08x\n",
+		   gpu->identity.minor_features0);
+	seq_printf(m, "\t minor_features1: 0x%08x\n",
+		   gpu->identity.minor_features1);
+	seq_printf(m, "\t minor_features2: 0x%08x\n",
+		   gpu->identity.minor_features2);
+	seq_printf(m, "\t minor_features3: 0x%08x\n",
+		   gpu->identity.minor_features3);
+
+	seq_puts(m, "\tspecs\n");
+	seq_printf(m, "\t stream_count:  %d\n",
+			gpu->identity.stream_count);
+	seq_printf(m, "\t register_max: %d\n",
+			gpu->identity.register_max);
+	seq_printf(m, "\t thread_count: %d\n",
+			gpu->identity.thread_count);
+	seq_printf(m, "\t vertex_cache_size: %d\n",
+			gpu->identity.vertex_cache_size);
+	seq_printf(m, "\t shader_core_count: %d\n",
+			gpu->identity.shader_core_count);
+	seq_printf(m, "\t pixel_pipes: %d\n",
+			gpu->identity.pixel_pipes);
+	seq_printf(m, "\t vertex_output_buffer_size: %d\n",
+			gpu->identity.vertex_output_buffer_size);
+	seq_printf(m, "\t buffer_size: %d\n",
+			gpu->identity.buffer_size);
+	seq_printf(m, "\t instruction_count: %d\n",
+			gpu->identity.instruction_count);
+	seq_printf(m, "\t num_constants: %d\n",
+			gpu->identity.num_constants);
+
+	seq_printf(m, "\taxi: 0x%08x\n", axi);
+	seq_printf(m, "\tidle: 0x%08x\n", idle);
+	idle |= ~gpu->idle_mask & ~VIVS_HI_IDLE_STATE_AXI_LP;
+	if ((idle & VIVS_HI_IDLE_STATE_FE) == 0)
+		seq_puts(m, "\t FE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_DE) == 0)
+		seq_puts(m, "\t DE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_PE) == 0)
+		seq_puts(m, "\t PE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_SH) == 0)
+		seq_puts(m, "\t SH is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_PA) == 0)
+		seq_puts(m, "\t PA is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_SE) == 0)
+		seq_puts(m, "\t SE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_RA) == 0)
+		seq_puts(m, "\t RA is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_TX) == 0)
+		seq_puts(m, "\t TX is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_VG) == 0)
+		seq_puts(m, "\t VG is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_IM) == 0)
+		seq_puts(m, "\t IM is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_FP) == 0)
+		seq_puts(m, "\t FP is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_TS) == 0)
+		seq_puts(m, "\t TS is not idle\n");
+	if (idle & VIVS_HI_IDLE_STATE_AXI_LP)
+		seq_puts(m, "\t AXI low power mode\n");
+
+	if (gpu->identity.features & chipFeatures_DEBUG_MODE) {
+		u32 read0 = gpu_read(gpu, VIVS_MC_DEBUG_READ0);
+		u32 read1 = gpu_read(gpu, VIVS_MC_DEBUG_READ1);
+		u32 write = gpu_read(gpu, VIVS_MC_DEBUG_WRITE);
+
+		seq_puts(m, "\tMC\n");
+		seq_printf(m, "\t read0: 0x%08x\n", read0);
+		seq_printf(m, "\t read1: 0x%08x\n", read1);
+		seq_printf(m, "\t write: 0x%08x\n", write);
+	}
+
+	seq_puts(m, "\tDMA ");
+
+	if (debug.address[0] == debug.address[1] &&
+	    debug.state[0] == debug.state[1]) {
+		seq_puts(m, "seems to be stuck\n");
+	} else if (debug.address[0] == debug.address[1]) {
+		seq_puts(m, "adress is constant\n");
+	} else {
+		seq_puts(m, "is runing\n");
+	}
+
+	seq_printf(m, "\t address 0: 0x%08x\n", debug.address[0]);
+	seq_printf(m, "\t address 1: 0x%08x\n", debug.address[1]);
+	seq_printf(m, "\t state 0: 0x%08x\n", debug.state[0]);
+	seq_printf(m, "\t state 1: 0x%08x\n", debug.state[1]);
+	seq_printf(m, "\t last fetch 64 bit word: 0x%08x 0x%08x\n",
+		   dma_lo, dma_hi);
+
+	ret = 0;
+
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return ret;
+}
+#endif
+
+/*
+ * Power Management:
+ */
+static int enable_clk(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_core)
+		clk_prepare_enable(gpu->clk_core);
+	if (gpu->clk_shader)
+		clk_prepare_enable(gpu->clk_shader);
+
+	return 0;
+}
+
+static int disable_clk(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_core)
+		clk_disable_unprepare(gpu->clk_core);
+	if (gpu->clk_shader)
+		clk_disable_unprepare(gpu->clk_shader);
+
+	return 0;
+}
+
+static int enable_axi(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_bus)
+		clk_prepare_enable(gpu->clk_bus);
+
+	return 0;
+}
+
+static int disable_axi(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_bus)
+		clk_disable_unprepare(gpu->clk_bus);
+
+	return 0;
+}
+
+/*
+ * Hangcheck detection for locked gpu:
+ */
+static void recover_worker(struct work_struct *work)
+{
+	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
+					       recover_work);
+	unsigned long flags;
+	unsigned int i;
+
+	dev_err(gpu->dev, "hangcheck recover!\n");
+
+	if (pm_runtime_get_sync(gpu->dev) < 0)
+		return;
+
+	mutex_lock(&gpu->lock);
+
+	/* Only catch the first event, or when manually re-armed */
+	if (etnaviv_dump_core) {
+		etnaviv_core_dump(gpu);
+		etnaviv_dump_core = false;
+	}
+
+	etnaviv_hw_reset(gpu);
+
+	/* complete all events, the GPU won't do it after the reset */
+	spin_lock_irqsave(&gpu->event_spinlock, flags);
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
+		if (!gpu->event[i].used)
+			continue;
+		fence_signal(gpu->event[i].fence);
+		gpu->event[i].fence = NULL;
+		gpu->event[i].used = false;
+		complete(&gpu->event_free);
+		/*
+		 * Decrement the PM count for each stuck event. This is safe
+		 * even in atomic context as we use ASYNC RPM here.
+		 */
+		pm_runtime_put_autosuspend(gpu->dev);
+	}
+	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+	gpu->completed_fence = gpu->active_fence;
+
+	etnaviv_gpu_hw_init(gpu);
+	gpu->switch_context = true;
+
+	mutex_unlock(&gpu->lock);
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	/* Retire the buffer objects in a work */
+	etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+}
+
+static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
+{
+	DBG("%s", dev_name(gpu->dev));
+	mod_timer(&gpu->hangcheck_timer,
+		  round_jiffies_up(jiffies + DRM_ETNAVIV_HANGCHECK_JIFFIES));
+}
+
+static void hangcheck_handler(unsigned long data)
+{
+	struct etnaviv_gpu *gpu = (struct etnaviv_gpu *)data;
+	u32 fence = gpu->completed_fence;
+	bool progress = false;
+
+	if (fence != gpu->hangcheck_fence) {
+		gpu->hangcheck_fence = fence;
+		progress = true;
+	}
+
+	if (!progress) {
+		u32 dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+		int change = dma_addr - gpu->hangcheck_dma_addr;
+
+		if (change < 0 || change > 16) {
+			gpu->hangcheck_dma_addr = dma_addr;
+			progress = true;
+		}
+	}
+
+	if (!progress && fence_after(gpu->active_fence, fence)) {
+		dev_err(gpu->dev, "hangcheck detected gpu lockup!\n");
+		dev_err(gpu->dev, "     completed fence: %u\n", fence);
+		dev_err(gpu->dev, "     active fence: %u\n",
+			gpu->active_fence);
+		etnaviv_queue_work(gpu->drm, &gpu->recover_work);
+	}
+
+	/* if still more pending work, reset the hangcheck timer: */
+	if (fence_after(gpu->active_fence, gpu->hangcheck_fence))
+		hangcheck_timer_reset(gpu);
+}
+
+static void hangcheck_disable(struct etnaviv_gpu *gpu)
+{
+	del_timer_sync(&gpu->hangcheck_timer);
+	cancel_work_sync(&gpu->recover_work);
+}
+
+/* fence object management */
+struct etnaviv_fence {
+	struct etnaviv_gpu *gpu;
+	struct fence base;
+};
+
+static inline struct etnaviv_fence *to_etnaviv_fence(struct fence *fence)
+{
+	return container_of(fence, struct etnaviv_fence, base);
+}
+
+static const char *etnaviv_fence_get_driver_name(struct fence *fence)
+{
+	return "etnaviv";
+}
+
+static const char *etnaviv_fence_get_timeline_name(struct fence *fence)
+{
+	struct etnaviv_fence *f = to_etnaviv_fence(fence);
+
+	return dev_name(f->gpu->dev);
+}
+
+static bool etnaviv_fence_enable_signaling(struct fence *fence)
+{
+	return true;
+}
+
+static bool etnaviv_fence_signaled(struct fence *fence)
+{
+	struct etnaviv_fence *f = to_etnaviv_fence(fence);
+
+	return fence_completed(f->gpu, f->base.seqno);
+}
+
+static void etnaviv_fence_release(struct fence *fence)
+{
+	struct etnaviv_fence *f = to_etnaviv_fence(fence);
+
+	kfree_rcu(f, base.rcu);
+}
+
+static const struct fence_ops etnaviv_fence_ops = {
+	.get_driver_name = etnaviv_fence_get_driver_name,
+	.get_timeline_name = etnaviv_fence_get_timeline_name,
+	.enable_signaling = etnaviv_fence_enable_signaling,
+	.signaled = etnaviv_fence_signaled,
+	.wait = fence_default_wait,
+	.release = etnaviv_fence_release,
+};
+
+static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_fence *f;
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	f->gpu = gpu;
+
+	fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock,
+		   gpu->fence_context, ++gpu->next_fence);
+
+	return &f->base;
+}
+
+int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
+	unsigned int context, bool exclusive)
+{
+	struct reservation_object *robj = etnaviv_obj->resv;
+	struct reservation_object_list *fobj;
+	struct fence *fence;
+	int i, ret;
+
+	if (!exclusive) {
+		ret = reservation_object_reserve_shared(robj);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * If we have any shared fences, then the exclusive fence
+	 * should be ignored as it will already have been signalled.
+	 */
+	fobj = reservation_object_get_list(robj);
+	if (!fobj || fobj->shared_count == 0) {
+		/* Wait on any existing exclusive fence which isn't our own */
+		fence = reservation_object_get_excl(robj);
+		if (fence && fence->context != context) {
+			ret = fence_wait(fence, true);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (!exclusive || !fobj)
+		return 0;
+
+	for (i = 0; i < fobj->shared_count; i++) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(robj));
+		if (fence->context != context) {
+			ret = fence_wait(fence, true);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * event management:
+ */
+
+static unsigned int event_alloc(struct etnaviv_gpu *gpu)
+{
+	unsigned long ret, flags;
+	unsigned int i, event = ~0U;
+
+	ret = wait_for_completion_timeout(&gpu->event_free,
+					  msecs_to_jiffies(10 * 10000));
+	if (!ret)
+		dev_err(gpu->dev, "wait_for_completion_timeout failed");
+
+	spin_lock_irqsave(&gpu->event_spinlock, flags);
+
+	/* find first free event */
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
+		if (gpu->event[i].used == false) {
+			gpu->event[i].used = true;
+			event = i;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+
+	return event;
+}
+
+static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpu->event_spinlock, flags);
+
+	if (gpu->event[event].used == false) {
+		dev_warn(gpu->dev, "event %u is already marked as free",
+			 event);
+		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+	} else {
+		gpu->event[event].used = false;
+		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+
+		complete(&gpu->event_free);
+	}
+}
+
+/*
+ * Cmdstream submission/retirement:
+ */
+
+struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
+	size_t nr_bos)
+{
+	struct etnaviv_cmdbuf *cmdbuf;
+	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo[0]),
+				 sizeof(*cmdbuf));
+
+	cmdbuf = kzalloc(sz, GFP_KERNEL);
+	if (!cmdbuf)
+		return NULL;
+
+	cmdbuf->vaddr = dma_alloc_writecombine(gpu->dev, size, &cmdbuf->paddr,
+					       GFP_KERNEL);
+	if (!cmdbuf->vaddr) {
+		kfree(cmdbuf);
+		return NULL;
+	}
+
+	cmdbuf->gpu = gpu;
+	cmdbuf->size = size;
+
+	return cmdbuf;
+}
+
+void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
+{
+	dma_free_writecombine(cmdbuf->gpu->dev, cmdbuf->size,
+			      cmdbuf->vaddr, cmdbuf->paddr);
+	kfree(cmdbuf);
+}
+
+static void retire_worker(struct work_struct *work)
+{
+	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
+					       retire_work);
+	u32 fence = gpu->completed_fence;
+	struct etnaviv_cmdbuf *cmdbuf, *tmp;
+	unsigned int i;
+
+	mutex_lock(&gpu->lock);
+	list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) {
+		if (!fence_is_signaled(cmdbuf->fence))
+			break;
+
+		list_del(&cmdbuf->node);
+		fence_put(cmdbuf->fence);
+
+		for (i = 0; i < cmdbuf->nr_bos; i++) {
+			struct etnaviv_gem_object *etnaviv_obj = cmdbuf->bo[i];
+
+			atomic_dec(&etnaviv_obj->gpu_active);
+			/* drop the refcount taken in etnaviv_gpu_submit */
+			etnaviv_gem_put_iova(gpu, &etnaviv_obj->base);
+		}
+
+		etnaviv_gpu_cmdbuf_free(cmdbuf);
+	}
+
+	gpu->retired_fence = fence;
+
+	mutex_unlock(&gpu->lock);
+
+	wake_up_all(&gpu->fence_event);
+}
+
+int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
+	u32 fence, struct timespec *timeout)
+{
+	int ret;
+
+	if (fence_after(fence, gpu->next_fence)) {
+		DRM_ERROR("waiting on invalid fence: %u (of %u)\n",
+				fence, gpu->next_fence);
+		return -EINVAL;
+	}
+
+	if (!timeout) {
+		/* No timeout was requested: just test for completion */
+		ret = fence_completed(gpu, fence) ? 0 : -EBUSY;
+	} else {
+		unsigned long remaining = etnaviv_timeout_to_jiffies(timeout);
+
+		ret = wait_event_interruptible_timeout(gpu->fence_event,
+						fence_completed(gpu, fence),
+						remaining);
+		if (ret == 0) {
+			DBG("timeout waiting for fence: %u (retired: %u completed: %u)",
+				fence, gpu->retired_fence,
+				gpu->completed_fence);
+			ret = -ETIMEDOUT;
+		} else if (ret != -ERESTARTSYS) {
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Wait for an object to become inactive.  This, on it's own, is not race
+ * free: the object is moved by the retire worker off the active list, and
+ * then the iova is put.  Moreover, the object could be re-submitted just
+ * after we notice that it's become inactive.
+ *
+ * Although the retirement happens under the gpu lock, we don't want to hold
+ * that lock in this function while waiting.
+ */
+int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout)
+{
+	unsigned long remaining;
+	long ret;
+
+	if (!timeout)
+		return !is_active(etnaviv_obj) ? 0 : -EBUSY;
+
+	remaining = etnaviv_timeout_to_jiffies(timeout);
+
+	ret = wait_event_interruptible_timeout(gpu->fence_event,
+					       !is_active(etnaviv_obj),
+					       remaining);
+	if (ret > 0) {
+		struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+
+		/* Synchronise with the retire worker */
+		flush_workqueue(priv->wq);
+		return 0;
+	} else if (ret == -ERESTARTSYS) {
+		return -ERESTARTSYS;
+	} else {
+		return -ETIMEDOUT;
+	}
+}
+
+int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu)
+{
+	return pm_runtime_get_sync(gpu->dev);
+}
+
+void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
+{
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+}
+
+/* add bo's to gpu's ring, and kick gpu: */
+int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
+{
+	struct fence *fence;
+	unsigned int event, i;
+	int ret;
+
+	ret = etnaviv_gpu_pm_get_sync(gpu);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&gpu->lock);
+
+	/*
+	 * TODO
+	 *
+	 * - flush
+	 * - data endian
+	 * - prefetch
+	 *
+	 */
+
+	event = event_alloc(gpu);
+	if (unlikely(event == ~0U)) {
+		DRM_ERROR("no free event\n");
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	fence = etnaviv_gpu_fence_alloc(gpu);
+	if (!fence) {
+		event_free(gpu, event);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	gpu->event[event].fence = fence;
+	submit->fence = fence->seqno;
+	gpu->active_fence = submit->fence;
+
+	if (gpu->lastctx != cmdbuf->ctx) {
+		gpu->mmu->need_flush = true;
+		gpu->switch_context = true;
+		gpu->lastctx = cmdbuf->ctx;
+	}
+
+	etnaviv_buffer_queue(gpu, event, cmdbuf);
+
+	cmdbuf->fence = fence;
+	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
+
+	/* We're committed to adding this command buffer, hold a PM reference */
+	pm_runtime_get_noresume(gpu->dev);
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		u32 iova;
+
+		/* Each cmdbuf takes a refcount on the iova */
+		etnaviv_gem_get_iova(gpu, &etnaviv_obj->base, &iova);
+		cmdbuf->bo[i] = etnaviv_obj;
+		atomic_inc(&etnaviv_obj->gpu_active);
+
+		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
+			reservation_object_add_excl_fence(etnaviv_obj->resv,
+							  fence);
+		else
+			reservation_object_add_shared_fence(etnaviv_obj->resv,
+							    fence);
+	}
+	cmdbuf->nr_bos = submit->nr_bos;
+	hangcheck_timer_reset(gpu);
+	ret = 0;
+
+out_unlock:
+	mutex_unlock(&gpu->lock);
+
+	etnaviv_gpu_pm_put(gpu);
+
+	return ret;
+}
+
+/*
+ * Init/Cleanup:
+ */
+static irqreturn_t irq_handler(int irq, void *data)
+{
+	struct etnaviv_gpu *gpu = data;
+	irqreturn_t ret = IRQ_NONE;
+
+	u32 intr = gpu_read(gpu, VIVS_HI_INTR_ACKNOWLEDGE);
+
+	if (intr != 0) {
+		int event;
+
+		pm_runtime_mark_last_busy(gpu->dev);
+
+		dev_dbg(gpu->dev, "intr 0x%08x\n", intr);
+
+		if (intr & VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR) {
+			dev_err(gpu->dev, "AXI bus error\n");
+			intr &= ~VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR;
+		}
+
+		while ((event = ffs(intr)) != 0) {
+			struct fence *fence;
+
+			event -= 1;
+
+			intr &= ~(1 << event);
+
+			dev_dbg(gpu->dev, "event %u\n", event);
+
+			fence = gpu->event[event].fence;
+			gpu->event[event].fence = NULL;
+			fence_signal(fence);
+
+			/*
+			 * Events can be processed out of order.  Eg,
+			 * - allocate and queue event 0
+			 * - allocate event 1
+			 * - event 0 completes, we process it
+			 * - allocate and queue event 0
+			 * - event 1 and event 0 complete
+			 * we can end up processing event 0 first, then 1.
+			 */
+			if (fence_after(fence->seqno, gpu->completed_fence))
+				gpu->completed_fence = fence->seqno;
+
+			event_free(gpu, event);
+
+			/*
+			 * We need to balance the runtime PM count caused by
+			 * each submission.  Upon submission, we increment
+			 * the runtime PM counter, and allocate one event.
+			 * So here, we put the runtime PM count for each
+			 * completed event.
+			 */
+			pm_runtime_put_autosuspend(gpu->dev);
+		}
+
+		/* Retire the buffer objects in a work */
+		etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int etnaviv_gpu_clk_enable(struct etnaviv_gpu *gpu)
+{
+	int ret;
+
+	ret = enable_clk(gpu);
+	if (ret)
+		return ret;
+
+	ret = enable_axi(gpu);
+	if (ret) {
+		disable_clk(gpu);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int etnaviv_gpu_clk_disable(struct etnaviv_gpu *gpu)
+{
+	int ret;
+
+	ret = disable_axi(gpu);
+	if (ret)
+		return ret;
+
+	ret = disable_clk(gpu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
+{
+	if (gpu->buffer) {
+		unsigned long timeout;
+
+		/* Replace the last WAIT with END */
+		etnaviv_buffer_end(gpu);
+
+		/*
+		 * We know that only the FE is busy here, this should
+		 * happen quickly (as the WAIT is only 200 cycles).  If
+		 * we fail, just warn and continue.
+		 */
+		timeout = jiffies + msecs_to_jiffies(100);
+		do {
+			u32 idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+
+			if ((idle & gpu->idle_mask) == gpu->idle_mask)
+				break;
+
+			if (time_is_before_jiffies(timeout)) {
+				dev_warn(gpu->dev,
+					 "timed out waiting for idle: idle=0x%x\n",
+					 idle);
+				break;
+			}
+
+			udelay(5);
+		} while (1);
+	}
+
+	return etnaviv_gpu_clk_disable(gpu);
+}
+
+#ifdef CONFIG_PM
+static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
+{
+	u32 clock;
+	int ret;
+
+	ret = mutex_lock_killable(&gpu->lock);
+	if (ret)
+		return ret;
+
+	clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+		VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
+
+	etnaviv_gpu_load_clock(gpu, clock);
+	etnaviv_gpu_hw_init(gpu);
+
+	gpu->switch_context = true;
+
+	mutex_unlock(&gpu->lock);
+
+	return 0;
+}
+#endif
+
+static int etnaviv_gpu_bind(struct device *dev, struct device *master,
+	void *data)
+{
+	struct drm_device *drm = data;
+	struct etnaviv_drm_private *priv = drm->dev_private;
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+	int ret;
+
+#ifdef CONFIG_PM
+	ret = pm_runtime_get_sync(gpu->dev);
+#else
+	ret = etnaviv_gpu_clk_enable(gpu);
+#endif
+	if (ret < 0)
+		return ret;
+
+	gpu->drm = drm;
+	gpu->fence_context = fence_context_alloc(1);
+	spin_lock_init(&gpu->fence_spinlock);
+
+	INIT_LIST_HEAD(&gpu->active_cmd_list);
+	INIT_WORK(&gpu->retire_work, retire_worker);
+	INIT_WORK(&gpu->recover_work, recover_worker);
+	init_waitqueue_head(&gpu->fence_event);
+
+	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
+			(unsigned long)gpu);
+
+	priv->gpu[priv->num_gpus++] = gpu;
+
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return 0;
+}
+
+static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
+	void *data)
+{
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+
+	DBG("%s", dev_name(gpu->dev));
+
+	hangcheck_disable(gpu);
+
+#ifdef CONFIG_PM
+	pm_runtime_get_sync(gpu->dev);
+	pm_runtime_put_sync_suspend(gpu->dev);
+#else
+	etnaviv_gpu_hw_suspend(gpu);
+#endif
+
+	if (gpu->buffer) {
+		etnaviv_gpu_cmdbuf_free(gpu->buffer);
+		gpu->buffer = NULL;
+	}
+
+	if (gpu->mmu) {
+		etnaviv_iommu_destroy(gpu->mmu);
+		gpu->mmu = NULL;
+	}
+
+	gpu->drm = NULL;
+}
+
+static const struct component_ops gpu_ops = {
+	.bind = etnaviv_gpu_bind,
+	.unbind = etnaviv_gpu_unbind,
+};
+
+static const struct of_device_id etnaviv_gpu_match[] = {
+	{
+		.compatible = "vivante,gc"
+	},
+	{ /* sentinel */ }
+};
+
+static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct etnaviv_gpu *gpu;
+	int err = 0;
+
+	gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
+	if (!gpu)
+		return -ENOMEM;
+
+	gpu->dev = &pdev->dev;
+	mutex_init(&gpu->lock);
+
+	/*
+	 * Set the GPU base address to the start of physical memory.  This
+	 * ensures that if we have up to 2GB, the v1 MMU can address the
+	 * highest memory.  This is important as command buffers may be
+	 * allocated outside of this limit.
+	 */
+	gpu->memory_base = PHYS_OFFSET;
+
+	/* Map registers: */
+	gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
+	if (IS_ERR(gpu->mmio))
+		return PTR_ERR(gpu->mmio);
+
+	/* Get Interrupt: */
+	gpu->irq = platform_get_irq(pdev, 0);
+	if (gpu->irq < 0) {
+		err = gpu->irq;
+		dev_err(dev, "failed to get irq: %d\n", err);
+		goto fail;
+	}
+
+	err = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 0,
+			       dev_name(gpu->dev), gpu);
+	if (err) {
+		dev_err(dev, "failed to request IRQ%u: %d\n", gpu->irq, err);
+		goto fail;
+	}
+
+	/* Get Clocks: */
+	gpu->clk_bus = devm_clk_get(&pdev->dev, "bus");
+	DBG("clk_bus: %p", gpu->clk_bus);
+	if (IS_ERR(gpu->clk_bus))
+		gpu->clk_bus = NULL;
+
+	gpu->clk_core = devm_clk_get(&pdev->dev, "core");
+	DBG("clk_core: %p", gpu->clk_core);
+	if (IS_ERR(gpu->clk_core))
+		gpu->clk_core = NULL;
+
+	gpu->clk_shader = devm_clk_get(&pdev->dev, "shader");
+	DBG("clk_shader: %p", gpu->clk_shader);
+	if (IS_ERR(gpu->clk_shader))
+		gpu->clk_shader = NULL;
+
+	/* TODO: figure out max mapped size */
+	dev_set_drvdata(dev, gpu);
+
+	/*
+	 * We treat the device as initially suspended.  The runtime PM
+	 * autosuspend delay is rather arbitary: no measurements have
+	 * yet been performed to determine an appropriate value.
+	 */
+	pm_runtime_use_autosuspend(gpu->dev);
+	pm_runtime_set_autosuspend_delay(gpu->dev, 200);
+	pm_runtime_enable(gpu->dev);
+
+	err = component_add(&pdev->dev, &gpu_ops);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register component: %d\n", err);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	return err;
+}
+
+static int etnaviv_gpu_platform_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &gpu_ops);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int etnaviv_gpu_rpm_suspend(struct device *dev)
+{
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+	u32 idle, mask;
+
+	/* If we have outstanding fences, we're not idle */
+	if (gpu->completed_fence != gpu->active_fence)
+		return -EBUSY;
+
+	/* Check whether the hardware (except FE) is idle */
+	mask = gpu->idle_mask & ~VIVS_HI_IDLE_STATE_FE;
+	idle = gpu_read(gpu, VIVS_HI_IDLE_STATE) & mask;
+	if (idle != mask)
+		return -EBUSY;
+
+	return etnaviv_gpu_hw_suspend(gpu);
+}
+
+static int etnaviv_gpu_rpm_resume(struct device *dev)
+{
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+	int ret;
+
+	ret = etnaviv_gpu_clk_enable(gpu);
+	if (ret)
+		return ret;
+
+	/* Re-initialise the basic hardware state */
+	if (gpu->drm && gpu->buffer) {
+		ret = etnaviv_gpu_hw_resume(gpu);
+		if (ret) {
+			etnaviv_gpu_clk_disable(gpu);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etnaviv_gpu_pm_ops = {
+	SET_RUNTIME_PM_OPS(etnaviv_gpu_rpm_suspend, etnaviv_gpu_rpm_resume,
+			   NULL)
+};
+
+struct platform_driver etnaviv_gpu_driver = {
+	.driver = {
+		.name = "etnaviv-gpu",
+		.owner = THIS_MODULE,
+		.pm = &etnaviv_gpu_pm_ops,
+		.of_match_table = etnaviv_gpu_match,
+	},
+	.probe = etnaviv_gpu_platform_probe,
+	.remove = etnaviv_gpu_platform_remove,
+	.id_table = gpu_ids,
+};
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
new file mode 100644
index 000000000000..c75d50359ab0
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_GPU_H__
+#define __ETNAVIV_GPU_H__
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include "etnaviv_drv.h"
+
+struct etnaviv_gem_submit;
+
+struct etnaviv_chip_identity {
+	/* Chip model. */
+	u32 model;
+
+	/* Revision value.*/
+	u32 revision;
+
+	/* Supported feature fields. */
+	u32 features;
+
+	/* Supported minor feature fields. */
+	u32 minor_features0;
+
+	/* Supported minor feature 1 fields. */
+	u32 minor_features1;
+
+	/* Supported minor feature 2 fields. */
+	u32 minor_features2;
+
+	/* Supported minor feature 3 fields. */
+	u32 minor_features3;
+
+	/* Number of streams supported. */
+	u32 stream_count;
+
+	/* Total number of temporary registers per thread. */
+	u32 register_max;
+
+	/* Maximum number of threads. */
+	u32 thread_count;
+
+	/* Number of shader cores. */
+	u32 shader_core_count;
+
+	/* Size of the vertex cache. */
+	u32 vertex_cache_size;
+
+	/* Number of entries in the vertex output buffer. */
+	u32 vertex_output_buffer_size;
+
+	/* Number of pixel pipes. */
+	u32 pixel_pipes;
+
+	/* Number of instructions. */
+	u32 instruction_count;
+
+	/* Number of constants. */
+	u32 num_constants;
+
+	/* Buffer size */
+	u32 buffer_size;
+};
+
+struct etnaviv_event {
+	bool used;
+	struct fence *fence;
+};
+
+struct etnaviv_cmdbuf;
+
+struct etnaviv_gpu {
+	struct drm_device *drm;
+	struct device *dev;
+	struct mutex lock;
+	struct etnaviv_chip_identity identity;
+	struct etnaviv_file_private *lastctx;
+	bool switch_context;
+
+	/* 'ring'-buffer: */
+	struct etnaviv_cmdbuf *buffer;
+
+	/* bus base address of memory  */
+	u32 memory_base;
+
+	/* event management: */
+	struct etnaviv_event event[30];
+	struct completion event_free;
+	spinlock_t event_spinlock;
+
+	/* list of currently in-flight command buffers */
+	struct list_head active_cmd_list;
+
+	u32 idle_mask;
+
+	/* Fencing support */
+	u32 next_fence;
+	u32 active_fence;
+	u32 completed_fence;
+	u32 retired_fence;
+	wait_queue_head_t fence_event;
+	unsigned int fence_context;
+	spinlock_t fence_spinlock;
+
+	/* worker for handling active-list retiring: */
+	struct work_struct retire_work;
+
+	void __iomem *mmio;
+	int irq;
+
+	struct etnaviv_iommu *mmu;
+
+	/* Power Control: */
+	struct clk *clk_bus;
+	struct clk *clk_core;
+	struct clk *clk_shader;
+
+	/* Hang Detction: */
+#define DRM_ETNAVIV_HANGCHECK_PERIOD 500 /* in ms */
+#define DRM_ETNAVIV_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_ETNAVIV_HANGCHECK_PERIOD)
+	struct timer_list hangcheck_timer;
+	u32 hangcheck_fence;
+	u32 hangcheck_dma_addr;
+	struct work_struct recover_work;
+};
+
+struct etnaviv_cmdbuf {
+	/* device this cmdbuf is allocated for */
+	struct etnaviv_gpu *gpu;
+	/* user context key, must be unique between all active users */
+	struct etnaviv_file_private *ctx;
+	/* cmdbuf properties */
+	void *vaddr;
+	dma_addr_t paddr;
+	u32 size;
+	u32 user_size;
+	/* fence after which this buffer is to be disposed */
+	struct fence *fence;
+	/* target exec state */
+	u32 exec_state;
+	/* per GPU in-flight list */
+	struct list_head node;
+	/* BOs attached to this command buffer */
+	unsigned int nr_bos;
+	struct etnaviv_gem_object *bo[0];
+};
+
+static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
+{
+	etnaviv_writel(data, gpu->mmio + reg);
+}
+
+static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg)
+{
+	return etnaviv_readl(gpu->mmio + reg);
+}
+
+static inline bool fence_completed(struct etnaviv_gpu *gpu, u32 fence)
+{
+	return fence_after_eq(gpu->completed_fence, fence);
+}
+
+static inline bool fence_retired(struct etnaviv_gpu *gpu, u32 fence)
+{
+	return fence_after_eq(gpu->retired_fence, fence);
+}
+
+int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value);
+
+int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
+
+#ifdef CONFIG_DEBUG_FS
+int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
+#endif
+
+int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
+	unsigned int context, bool exclusive);
+
+void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
+int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
+	u32 fence, struct timespec *timeout);
+int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
+int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
+struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu,
+					      u32 size, size_t nr_bos);
+void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
+int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
+void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
+
+extern struct platform_driver etnaviv_gpu_driver;
+
+#endif /* __ETNAVIV_GPU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
new file mode 100644
index 000000000000..522cfd447892
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+#include "etnaviv_iommu.h"
+#include "state_hi.xml.h"
+
+#define PT_SIZE		SZ_2M
+#define PT_ENTRIES	(PT_SIZE / sizeof(u32))
+
+#define GPU_MEM_START	0x80000000
+
+struct etnaviv_iommu_domain_pgtable {
+	u32 *pgtable;
+	dma_addr_t paddr;
+};
+
+struct etnaviv_iommu_domain {
+	struct iommu_domain domain;
+	struct device *dev;
+	void *bad_page_cpu;
+	dma_addr_t bad_page_dma;
+	struct etnaviv_iommu_domain_pgtable pgtable;
+	spinlock_t map_lock;
+};
+
+static struct etnaviv_iommu_domain *to_etnaviv_domain(struct iommu_domain *domain)
+{
+	return container_of(domain, struct etnaviv_iommu_domain, domain);
+}
+
+static int pgtable_alloc(struct etnaviv_iommu_domain_pgtable *pgtable,
+			 size_t size)
+{
+	pgtable->pgtable = dma_alloc_coherent(NULL, size, &pgtable->paddr, GFP_KERNEL);
+	if (!pgtable->pgtable)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void pgtable_free(struct etnaviv_iommu_domain_pgtable *pgtable,
+			 size_t size)
+{
+	dma_free_coherent(NULL, size, pgtable->pgtable, pgtable->paddr);
+}
+
+static u32 pgtable_read(struct etnaviv_iommu_domain_pgtable *pgtable,
+			   unsigned long iova)
+{
+	/* calcuate index into page table */
+	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;
+	phys_addr_t paddr;
+
+	paddr = pgtable->pgtable[index];
+
+	return paddr;
+}
+
+static void pgtable_write(struct etnaviv_iommu_domain_pgtable *pgtable,
+			  unsigned long iova, phys_addr_t paddr)
+{
+	/* calcuate index into page table */
+	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;
+
+	pgtable->pgtable[index] = paddr;
+}
+
+static int __etnaviv_iommu_init(struct etnaviv_iommu_domain *etnaviv_domain)
+{
+	u32 *p;
+	int ret, i;
+
+	etnaviv_domain->bad_page_cpu = dma_alloc_coherent(etnaviv_domain->dev,
+						  SZ_4K,
+						  &etnaviv_domain->bad_page_dma,
+						  GFP_KERNEL);
+	if (!etnaviv_domain->bad_page_cpu)
+		return -ENOMEM;
+
+	p = etnaviv_domain->bad_page_cpu;
+	for (i = 0; i < SZ_4K / 4; i++)
+		*p++ = 0xdead55aa;
+
+	ret = pgtable_alloc(&etnaviv_domain->pgtable, PT_SIZE);
+	if (ret < 0) {
+		dma_free_coherent(etnaviv_domain->dev, SZ_4K,
+				  etnaviv_domain->bad_page_cpu,
+				  etnaviv_domain->bad_page_dma);
+		return ret;
+	}
+
+	for (i = 0; i < PT_ENTRIES; i++)
+		etnaviv_domain->pgtable.pgtable[i] =
+			etnaviv_domain->bad_page_dma;
+
+	spin_lock_init(&etnaviv_domain->map_lock);
+
+	return 0;
+}
+
+static void etnaviv_domain_free(struct iommu_domain *domain)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	pgtable_free(&etnaviv_domain->pgtable, PT_SIZE);
+
+	dma_free_coherent(etnaviv_domain->dev, SZ_4K,
+			  etnaviv_domain->bad_page_cpu,
+			  etnaviv_domain->bad_page_dma);
+
+	kfree(etnaviv_domain);
+}
+
+static int etnaviv_iommuv1_map(struct iommu_domain *domain, unsigned long iova,
+	   phys_addr_t paddr, size_t size, int prot)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	if (size != SZ_4K)
+		return -EINVAL;
+
+	spin_lock(&etnaviv_domain->map_lock);
+	pgtable_write(&etnaviv_domain->pgtable, iova, paddr);
+	spin_unlock(&etnaviv_domain->map_lock);
+
+	return 0;
+}
+
+static size_t etnaviv_iommuv1_unmap(struct iommu_domain *domain,
+	unsigned long iova, size_t size)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	if (size != SZ_4K)
+		return -EINVAL;
+
+	spin_lock(&etnaviv_domain->map_lock);
+	pgtable_write(&etnaviv_domain->pgtable, iova,
+		      etnaviv_domain->bad_page_dma);
+	spin_unlock(&etnaviv_domain->map_lock);
+
+	return SZ_4K;
+}
+
+static phys_addr_t etnaviv_iommu_iova_to_phys(struct iommu_domain *domain,
+	dma_addr_t iova)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	return pgtable_read(&etnaviv_domain->pgtable, iova);
+}
+
+static size_t etnaviv_iommuv1_dump_size(struct iommu_domain *domain)
+{
+	return PT_SIZE;
+}
+
+static void etnaviv_iommuv1_dump(struct iommu_domain *domain, void *buf)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	memcpy(buf, etnaviv_domain->pgtable.pgtable, PT_SIZE);
+}
+
+static struct etnaviv_iommu_ops etnaviv_iommu_ops = {
+	.ops = {
+		.domain_free = etnaviv_domain_free,
+		.map = etnaviv_iommuv1_map,
+		.unmap = etnaviv_iommuv1_unmap,
+		.iova_to_phys = etnaviv_iommu_iova_to_phys,
+		.pgsize_bitmap = SZ_4K,
+	},
+	.dump_size = etnaviv_iommuv1_dump_size,
+	.dump = etnaviv_iommuv1_dump,
+};
+
+void etnaviv_iommu_domain_restore(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+	u32 pgtable;
+
+	/* set page table address in MC */
+	pgtable = (u32)etnaviv_domain->pgtable.paddr;
+
+	gpu_write(gpu, VIVS_MC_MMU_FE_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_TX_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_PE_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_PEZ_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_RA_PAGE_TABLE, pgtable);
+}
+
+struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain;
+	int ret;
+
+	etnaviv_domain = kzalloc(sizeof(*etnaviv_domain), GFP_KERNEL);
+	if (!etnaviv_domain)
+		return NULL;
+
+	etnaviv_domain->dev = gpu->dev;
+
+	etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING;
+	etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops;
+	etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START;
+	etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1;
+
+	ret = __etnaviv_iommu_init(etnaviv_domain);
+	if (ret)
+		goto out_free;
+
+	return &etnaviv_domain->domain;
+
+out_free:
+	kfree(etnaviv_domain);
+	return NULL;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
new file mode 100644
index 000000000000..cf45503f6b6f
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+  *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_IOMMU_H__
+#define __ETNAVIV_IOMMU_H__
+
+#include <linux/iommu.h>
+struct etnaviv_gpu;
+
+struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu);
+void etnaviv_iommu_domain_restore(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain);
+struct iommu_domain *etnaviv_iommu_v2_domain_alloc(struct etnaviv_gpu *gpu);
+
+#endif /* __ETNAVIV_IOMMU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
new file mode 100644
index 000000000000..fbb4aed3dc80
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+  *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+
+#include "etnaviv_gpu.h"
+#include "etnaviv_iommu.h"
+#include "state_hi.xml.h"
+
+
+struct iommu_domain *etnaviv_iommu_v2_domain_alloc(struct etnaviv_gpu *gpu)
+{
+	/* TODO */
+	return NULL;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h
new file mode 100644
index 000000000000..603ea41c5389
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+  *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_IOMMU_V2_H__
+#define __ETNAVIV_IOMMU_V2_H__
+
+#include <linux/iommu.h>
+struct etnaviv_gpu;
+
+struct iommu_domain *etnaviv_iommu_v2_domain_alloc(struct etnaviv_gpu *gpu);
+
+#endif /* __ETNAVIV_IOMMU_V2_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
new file mode 100644
index 000000000000..6743bc648dc8
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+
+static int etnaviv_fault_handler(struct iommu_domain *iommu, struct device *dev,
+		unsigned long iova, int flags, void *arg)
+{
+	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
+	return 0;
+}
+
+int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
+		struct sg_table *sgt, unsigned len, int prot)
+{
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	unsigned int i, j;
+	int ret;
+
+	if (!domain || !sgt)
+		return -EINVAL;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa = sg_dma_address(sg) - sg->offset;
+		size_t bytes = sg_dma_len(sg) + sg->offset;
+
+		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
+
+		ret = iommu_map(domain, da, pa, bytes, prot);
+		if (ret)
+			goto fail;
+
+		da += bytes;
+	}
+
+	return 0;
+
+fail:
+	da = iova;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes = sg_dma_len(sg) + sg->offset;
+
+		iommu_unmap(domain, da, bytes);
+		da += bytes;
+	}
+	return ret;
+}
+
+int etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
+		struct sg_table *sgt, unsigned len)
+{
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	int i;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = sg_dma_len(sg) + sg->offset;
+		size_t unmapped;
+
+		unmapped = iommu_unmap(domain, da, bytes);
+		if (unmapped < bytes)
+			return unmapped;
+
+		VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);
+
+		BUG_ON(!PAGE_ALIGNED(bytes));
+
+		da += bytes;
+	}
+
+	return 0;
+}
+
+static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
+
+	etnaviv_iommu_unmap(mmu, mapping->vram_node.start,
+			    etnaviv_obj->sgt, etnaviv_obj->base.size);
+	drm_mm_remove_node(&mapping->vram_node);
+}
+
+int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
+	struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_vram_mapping *free = NULL;
+	struct sg_table *sgt = etnaviv_obj->sgt;
+	struct drm_mm_node *node;
+	int ret;
+
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	mutex_lock(&mmu->lock);
+
+	/* v1 MMU can optimize single entry (contiguous) scatterlists */
+	if (sgt->nents == 1 && !(etnaviv_obj->flags & ETNA_BO_FORCE_MMU)) {
+		u32 iova;
+
+		iova = sg_dma_address(sgt->sgl) - memory_base;
+		if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) {
+			mapping->iova = iova;
+			list_add_tail(&mapping->mmu_node, &mmu->mappings);
+			mutex_unlock(&mmu->lock);
+			return 0;
+		}
+	}
+
+	node = &mapping->vram_node;
+	while (1) {
+		struct etnaviv_vram_mapping *m, *n;
+		struct list_head list;
+		bool found;
+
+		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
+			etnaviv_obj->base.size, 0, mmu->last_iova, ~0UL,
+			DRM_MM_SEARCH_DEFAULT);
+
+		if (ret != -ENOSPC)
+			break;
+
+		/*
+		 * If we did not search from the start of the MMU region,
+		 * try again in case there are free slots.
+		 */
+		if (mmu->last_iova) {
+			mmu->last_iova = 0;
+			mmu->need_flush = true;
+			continue;
+		}
+
+		/* Try to retire some entries */
+		drm_mm_init_scan(&mmu->mm, etnaviv_obj->base.size, 0, 0);
+
+		found = 0;
+		INIT_LIST_HEAD(&list);
+		list_for_each_entry(free, &mmu->mappings, mmu_node) {
+			/* If this vram node has not been used, skip this. */
+			if (!free->vram_node.mm)
+				continue;
+
+			/*
+			 * If the iova is pinned, then it's in-use,
+			 * so we must keep its mapping.
+			 */
+			if (free->use)
+				continue;
+
+			list_add(&free->scan_node, &list);
+			if (drm_mm_scan_add_block(&free->vram_node)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			/* Nothing found, clean up and fail */
+			list_for_each_entry_safe(m, n, &list, scan_node)
+				BUG_ON(drm_mm_scan_remove_block(&m->vram_node));
+			break;
+		}
+
+		/*
+		 * drm_mm does not allow any other operations while
+		 * scanning, so we have to remove all blocks first.
+		 * If drm_mm_scan_remove_block() returns false, we
+		 * can leave the block pinned.
+		 */
+		list_for_each_entry_safe(m, n, &list, scan_node)
+			if (!drm_mm_scan_remove_block(&m->vram_node))
+				list_del_init(&m->scan_node);
+
+		/*
+		 * Unmap the blocks which need to be reaped from the MMU.
+		 * Clear the mmu pointer to prevent the get_iova finding
+		 * this mapping.
+		 */
+		list_for_each_entry_safe(m, n, &list, scan_node) {
+			etnaviv_iommu_remove_mapping(mmu, m);
+			m->mmu = NULL;
+			list_del_init(&m->mmu_node);
+			list_del_init(&m->scan_node);
+		}
+
+		/*
+		 * We removed enough mappings so that the new allocation will
+		 * succeed.  Ensure that the MMU will be flushed before the
+		 * associated commit requesting this mapping, and retry the
+		 * allocation one more time.
+		 */
+		mmu->need_flush = true;
+	}
+
+	if (ret < 0) {
+		mutex_unlock(&mmu->lock);
+		return ret;
+	}
+
+	mmu->last_iova = node->start + etnaviv_obj->base.size;
+	mapping->iova = node->start;
+	ret = etnaviv_iommu_map(mmu, node->start, sgt, etnaviv_obj->base.size,
+				IOMMU_READ | IOMMU_WRITE);
+
+	if (ret < 0) {
+		drm_mm_remove_node(node);
+		mutex_unlock(&mmu->lock);
+		return ret;
+	}
+
+	list_add_tail(&mapping->mmu_node, &mmu->mappings);
+	mutex_unlock(&mmu->lock);
+
+	return ret;
+}
+
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping)
+{
+	WARN_ON(mapping->use);
+
+	mutex_lock(&mmu->lock);
+
+	/* If the vram node is on the mm, unmap and remove the node */
+	if (mapping->vram_node.mm == &mmu->mm)
+		etnaviv_iommu_remove_mapping(mmu, mapping);
+
+	list_del(&mapping->mmu_node);
+	mutex_unlock(&mmu->lock);
+}
+
+void etnaviv_iommu_destroy(struct etnaviv_iommu *mmu)
+{
+	drm_mm_takedown(&mmu->mm);
+	iommu_domain_free(mmu->domain);
+	kfree(mmu);
+}
+
+struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain, enum etnaviv_iommu_version version)
+{
+	struct etnaviv_iommu *mmu;
+
+	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
+	if (!mmu)
+		return ERR_PTR(-ENOMEM);
+
+	mmu->domain = domain;
+	mmu->gpu = gpu;
+	mmu->version = version;
+	mutex_init(&mmu->lock);
+	INIT_LIST_HEAD(&mmu->mappings);
+
+	drm_mm_init(&mmu->mm, domain->geometry.aperture_start,
+		    domain->geometry.aperture_end -
+		      domain->geometry.aperture_start + 1);
+
+	iommu_set_fault_handler(domain, etnaviv_fault_handler, gpu->dev);
+
+	return mmu;
+}
+
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu)
+{
+	struct etnaviv_iommu_ops *ops;
+
+	ops = container_of(iommu->domain->ops, struct etnaviv_iommu_ops, ops);
+
+	return ops->dump_size(iommu->domain);
+}
+
+void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf)
+{
+	struct etnaviv_iommu_ops *ops;
+
+	ops = container_of(iommu->domain->ops, struct etnaviv_iommu_ops, ops);
+
+	ops->dump(iommu->domain, buf);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
new file mode 100644
index 000000000000..fff215a47630
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_MMU_H__
+#define __ETNAVIV_MMU_H__
+
+#include <linux/iommu.h>
+
+enum etnaviv_iommu_version {
+	ETNAVIV_IOMMU_V1 = 0,
+	ETNAVIV_IOMMU_V2,
+};
+
+struct etnaviv_gpu;
+struct etnaviv_vram_mapping;
+
+struct etnaviv_iommu_ops {
+	struct iommu_ops ops;
+	size_t (*dump_size)(struct iommu_domain *);
+	void (*dump)(struct iommu_domain *, void *);
+};
+
+struct etnaviv_iommu {
+	struct etnaviv_gpu *gpu;
+	struct iommu_domain *domain;
+
+	enum etnaviv_iommu_version version;
+
+	/* memory manager for GPU address area */
+	struct mutex lock;
+	struct list_head mappings;
+	struct drm_mm mm;
+	u32 last_iova;
+	bool need_flush;
+};
+
+struct etnaviv_gem_object;
+
+int etnaviv_iommu_attach(struct etnaviv_iommu *iommu, const char **names,
+	int cnt);
+int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
+	struct sg_table *sgt, unsigned len, int prot);
+int etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
+	struct sg_table *sgt, unsigned len);
+int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
+	struct etnaviv_vram_mapping *mapping);
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping);
+void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
+
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
+void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);
+
+struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain, enum etnaviv_iommu_version version);
+
+#endif /* __ETNAVIV_MMU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/state.xml.h b/drivers/gpu/drm/etnaviv/state.xml.h
new file mode 100644
index 000000000000..368218304566
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/state.xml.h
@@ -0,0 +1,351 @@
+#ifndef STATE_XML
+#define STATE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- state.xml    (  18882 bytes, from 2015-03-25 11:42:32)
+- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
+- state_2d.xml (  51549 bytes, from 2015-03-25 11:25:06)
+- state_3d.xml (  54600 bytes, from 2015-03-25 11:25:19)
+- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
+
+Copyright (C) 2015
+*/
+
+
+#define VARYING_COMPONENT_USE_UNUSED				0x00000000
+#define VARYING_COMPONENT_USE_USED				0x00000001
+#define VARYING_COMPONENT_USE_POINTCOORD_X			0x00000002
+#define VARYING_COMPONENT_USE_POINTCOORD_Y			0x00000003
+#define FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__MASK		0x000000ff
+#define FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__SHIFT		0
+#define FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(x)		(((x) << FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__SHIFT) & FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__MASK)
+#define VIVS_FE							0x00000000
+
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG(i0)		       (0x00000600 + 0x4*(i0))
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG__ESIZE			0x00000004
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN			0x00000010
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE__MASK		0x0000000f
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE__SHIFT		0
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_BYTE			0x00000000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_BYTE	0x00000001
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_SHORT		0x00000002
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_SHORT	0x00000003
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_INT			0x00000004
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_INT		0x00000005
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT		0x00000008
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_HALF_FLOAT		0x00000009
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FIXED		0x0000000b
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_INT_10_10_10_2	0x0000000c
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_INT_10_10_10_2	0x0000000d
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__MASK		0x00000030
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT		4
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE		0x00000080
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__MASK		0x00000700
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__SHIFT		8
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__MASK			0x00003000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__SHIFT		12
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE__MASK		0x0000c000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE__SHIFT		14
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF		0x00000000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_ON		0x00008000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_START__MASK		0x00ff0000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_START__SHIFT		16
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_START(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_START__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_START__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_END__MASK			0xff000000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_END__SHIFT		24
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_END(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_END__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_END__MASK)
+
+#define VIVS_FE_CMD_STREAM_BASE_ADDR				0x00000640
+
+#define VIVS_FE_INDEX_STREAM_BASE_ADDR				0x00000644
+
+#define VIVS_FE_INDEX_STREAM_CONTROL				0x00000648
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE__MASK			0x00000003
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE__SHIFT		0
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_CHAR		0x00000000
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_SHORT	0x00000001
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_INT		0x00000002
+
+#define VIVS_FE_VERTEX_STREAM_BASE_ADDR				0x0000064c
+
+#define VIVS_FE_VERTEX_STREAM_CONTROL				0x00000650
+
+#define VIVS_FE_COMMAND_ADDRESS					0x00000654
+
+#define VIVS_FE_COMMAND_CONTROL					0x00000658
+#define VIVS_FE_COMMAND_CONTROL_PREFETCH__MASK			0x0000ffff
+#define VIVS_FE_COMMAND_CONTROL_PREFETCH__SHIFT			0
+#define VIVS_FE_COMMAND_CONTROL_PREFETCH(x)			(((x) << VIVS_FE_COMMAND_CONTROL_PREFETCH__SHIFT) & VIVS_FE_COMMAND_CONTROL_PREFETCH__MASK)
+#define VIVS_FE_COMMAND_CONTROL_ENABLE				0x00010000
+
+#define VIVS_FE_DMA_STATUS					0x0000065c
+
+#define VIVS_FE_DMA_DEBUG_STATE					0x00000660
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE__MASK			0x0000001f
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE__SHIFT		0
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_IDLE			0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_DEC			0x00000001
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_ADR0			0x00000002
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_LOAD0			0x00000003
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_ADR1			0x00000004
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_LOAD1			0x00000005
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DADR			0x00000006
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DCMD			0x00000007
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DCNTL		0x00000008
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DIDXCNTL		0x00000009
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_INITREQDMA		0x0000000a
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_DRAWIDX		0x0000000b
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_DRAW			0x0000000c
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DRECT0		0x0000000d
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DRECT1		0x0000000e
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DDATA0		0x0000000f
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DDATA1		0x00000010
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_WAITFIFO		0x00000011
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_WAIT			0x00000012
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_LINK			0x00000013
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_END			0x00000014
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_STALL			0x00000015
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE__MASK		0x00000300
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE__SHIFT		8
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_START		0x00000100
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_REQ		0x00000200
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_END		0x00000300
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE__MASK		0x00000c00
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE__SHIFT		10
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE_RAMVALID	0x00000400
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE_VALID		0x00000800
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE__MASK		0x00003000
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE__SHIFT		12
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE_WAITIDX		0x00001000
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE_CAL		0x00002000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE__MASK			0x0000c000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE__SHIFT		14
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE_IDLE			0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE_LDADR			0x00004000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE_IDXCALC		0x00008000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE__MASK		0x00030000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE__SHIFT		16
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE_CKCACHE		0x00010000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE_MISS		0x00020000
+
+#define VIVS_FE_DMA_ADDRESS					0x00000664
+
+#define VIVS_FE_DMA_LOW						0x00000668
+
+#define VIVS_FE_DMA_HIGH					0x0000066c
+
+#define VIVS_FE_AUTO_FLUSH					0x00000670
+
+#define VIVS_FE_UNK00678					0x00000678
+
+#define VIVS_FE_UNK0067C					0x0000067c
+
+#define VIVS_FE_VERTEX_STREAMS(i0)			       (0x00000000 + 0x4*(i0))
+#define VIVS_FE_VERTEX_STREAMS__ESIZE				0x00000004
+#define VIVS_FE_VERTEX_STREAMS__LEN				0x00000008
+
+#define VIVS_FE_VERTEX_STREAMS_BASE_ADDR(i0)		       (0x00000680 + 0x4*(i0))
+
+#define VIVS_FE_VERTEX_STREAMS_CONTROL(i0)		       (0x000006a0 + 0x4*(i0))
+
+#define VIVS_FE_UNK00700(i0)				       (0x00000700 + 0x4*(i0))
+#define VIVS_FE_UNK00700__ESIZE					0x00000004
+#define VIVS_FE_UNK00700__LEN					0x00000010
+
+#define VIVS_FE_UNK00740(i0)				       (0x00000740 + 0x4*(i0))
+#define VIVS_FE_UNK00740__ESIZE					0x00000004
+#define VIVS_FE_UNK00740__LEN					0x00000010
+
+#define VIVS_FE_UNK00780(i0)				       (0x00000780 + 0x4*(i0))
+#define VIVS_FE_UNK00780__ESIZE					0x00000004
+#define VIVS_FE_UNK00780__LEN					0x00000010
+
+#define VIVS_GL							0x00000000
+
+#define VIVS_GL_PIPE_SELECT					0x00003800
+#define VIVS_GL_PIPE_SELECT_PIPE__MASK				0x00000001
+#define VIVS_GL_PIPE_SELECT_PIPE__SHIFT				0
+#define VIVS_GL_PIPE_SELECT_PIPE(x)				(((x) << VIVS_GL_PIPE_SELECT_PIPE__SHIFT) & VIVS_GL_PIPE_SELECT_PIPE__MASK)
+
+#define VIVS_GL_EVENT						0x00003804
+#define VIVS_GL_EVENT_EVENT_ID__MASK				0x0000001f
+#define VIVS_GL_EVENT_EVENT_ID__SHIFT				0
+#define VIVS_GL_EVENT_EVENT_ID(x)				(((x) << VIVS_GL_EVENT_EVENT_ID__SHIFT) & VIVS_GL_EVENT_EVENT_ID__MASK)
+#define VIVS_GL_EVENT_FROM_FE					0x00000020
+#define VIVS_GL_EVENT_FROM_PE					0x00000040
+#define VIVS_GL_EVENT_SOURCE__MASK				0x00001f00
+#define VIVS_GL_EVENT_SOURCE__SHIFT				8
+#define VIVS_GL_EVENT_SOURCE(x)					(((x) << VIVS_GL_EVENT_SOURCE__SHIFT) & VIVS_GL_EVENT_SOURCE__MASK)
+
+#define VIVS_GL_SEMAPHORE_TOKEN					0x00003808
+#define VIVS_GL_SEMAPHORE_TOKEN_FROM__MASK			0x0000001f
+#define VIVS_GL_SEMAPHORE_TOKEN_FROM__SHIFT			0
+#define VIVS_GL_SEMAPHORE_TOKEN_FROM(x)				(((x) << VIVS_GL_SEMAPHORE_TOKEN_FROM__SHIFT) & VIVS_GL_SEMAPHORE_TOKEN_FROM__MASK)
+#define VIVS_GL_SEMAPHORE_TOKEN_TO__MASK			0x00001f00
+#define VIVS_GL_SEMAPHORE_TOKEN_TO__SHIFT			8
+#define VIVS_GL_SEMAPHORE_TOKEN_TO(x)				(((x) << VIVS_GL_SEMAPHORE_TOKEN_TO__SHIFT) & VIVS_GL_SEMAPHORE_TOKEN_TO__MASK)
+
+#define VIVS_GL_FLUSH_CACHE					0x0000380c
+#define VIVS_GL_FLUSH_CACHE_DEPTH				0x00000001
+#define VIVS_GL_FLUSH_CACHE_COLOR				0x00000002
+#define VIVS_GL_FLUSH_CACHE_TEXTURE				0x00000004
+#define VIVS_GL_FLUSH_CACHE_PE2D				0x00000008
+#define VIVS_GL_FLUSH_CACHE_TEXTUREVS				0x00000010
+#define VIVS_GL_FLUSH_CACHE_SHADER_L1				0x00000020
+#define VIVS_GL_FLUSH_CACHE_SHADER_L2				0x00000040
+
+#define VIVS_GL_FLUSH_MMU					0x00003810
+#define VIVS_GL_FLUSH_MMU_FLUSH_FEMMU				0x00000001
+#define VIVS_GL_FLUSH_MMU_FLUSH_UNK1				0x00000002
+#define VIVS_GL_FLUSH_MMU_FLUSH_UNK2				0x00000004
+#define VIVS_GL_FLUSH_MMU_FLUSH_PEMMU				0x00000008
+#define VIVS_GL_FLUSH_MMU_FLUSH_UNK4				0x00000010
+
+#define VIVS_GL_VERTEX_ELEMENT_CONFIG				0x00003814
+
+#define VIVS_GL_MULTI_SAMPLE_CONFIG				0x00003818
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK		0x00000003
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__SHIFT		0
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_NONE		0x00000000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_2X		0x00000001
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_4X		0x00000002
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_MASK		0x00000008
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__MASK		0x000000f0
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__SHIFT		4
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(x)		(((x) << VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__SHIFT) & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__MASK)
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES_MASK		0x00000100
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__MASK			0x00007000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__SHIFT		12
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12(x)			(((x) << VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__SHIFT) & VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__MASK)
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12_MASK			0x00008000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__MASK			0x00030000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__SHIFT		16
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16(x)			(((x) << VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__SHIFT) & VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__MASK)
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16_MASK			0x00080000
+
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS			0x0000381c
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__MASK		0x000000ff
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__SHIFT		0
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(x)			(((x) << VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__SHIFT) & VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__MASK)
+
+#define VIVS_GL_VARYING_NUM_COMPONENTS				0x00003820
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__MASK		0x00000007
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__SHIFT		0
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR0(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__MASK		0x00000070
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__SHIFT		4
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR1(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__MASK		0x00000700
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__SHIFT		8
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR2(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__MASK		0x00007000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__SHIFT		12
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR3(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__MASK		0x00070000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__SHIFT		16
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR4(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__MASK		0x00700000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__SHIFT		20
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR5(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__MASK		0x07000000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__SHIFT		24
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR6(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__MASK		0x70000000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__SHIFT		28
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__MASK)
+
+#define VIVS_GL_VARYING_COMPONENT_USE(i0)		       (0x00003828 + 0x4*(i0))
+#define VIVS_GL_VARYING_COMPONENT_USE__ESIZE			0x00000004
+#define VIVS_GL_VARYING_COMPONENT_USE__LEN			0x00000002
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP0__MASK		0x00000003
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP0__SHIFT		0
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP0(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP0__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP0__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP1__MASK		0x0000000c
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP1__SHIFT		2
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP1(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP1__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP1__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP2__MASK		0x00000030
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP2__SHIFT		4
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP2(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP2__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP2__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP3__MASK		0x000000c0
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP3__SHIFT		6
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP3(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP3__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP3__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP4__MASK		0x00000300
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP4__SHIFT		8
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP4(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP4__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP4__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP5__MASK		0x00000c00
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP5__SHIFT		10
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP5(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP5__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP5__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP6__MASK		0x00003000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP6__SHIFT		12
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP6(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP6__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP6__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP7__MASK		0x0000c000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP7__SHIFT		14
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP7(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP7__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP7__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP8__MASK		0x00030000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP8__SHIFT		16
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP8(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP8__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP8__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP9__MASK		0x000c0000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP9__SHIFT		18
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP9(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP9__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP9__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP10__MASK		0x00300000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP10__SHIFT		20
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP10(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP10__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP10__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP11__MASK		0x00c00000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP11__SHIFT		22
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP11(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP11__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP11__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP12__MASK		0x03000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP12__SHIFT		24
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP12(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP12__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP12__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP13__MASK		0x0c000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP13__SHIFT		26
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP13(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP13__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP13__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP14__MASK		0x30000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP14__SHIFT		28
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP14(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP14__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP14__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP15__MASK		0xc0000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP15__SHIFT		30
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP15(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP15__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP15__MASK)
+
+#define VIVS_GL_UNK03834					0x00003834
+
+#define VIVS_GL_UNK03838					0x00003838
+
+#define VIVS_GL_API_MODE					0x0000384c
+#define VIVS_GL_API_MODE_OPENGL					0x00000000
+#define VIVS_GL_API_MODE_OPENVG					0x00000001
+#define VIVS_GL_API_MODE_OPENCL					0x00000002
+
+#define VIVS_GL_CONTEXT_POINTER					0x00003850
+
+#define VIVS_GL_UNK03A00					0x00003a00
+
+#define VIVS_GL_STALL_TOKEN					0x00003c00
+#define VIVS_GL_STALL_TOKEN_FROM__MASK				0x0000001f
+#define VIVS_GL_STALL_TOKEN_FROM__SHIFT				0
+#define VIVS_GL_STALL_TOKEN_FROM(x)				(((x) << VIVS_GL_STALL_TOKEN_FROM__SHIFT) & VIVS_GL_STALL_TOKEN_FROM__MASK)
+#define VIVS_GL_STALL_TOKEN_TO__MASK				0x00001f00
+#define VIVS_GL_STALL_TOKEN_TO__SHIFT				8
+#define VIVS_GL_STALL_TOKEN_TO(x)				(((x) << VIVS_GL_STALL_TOKEN_TO__SHIFT) & VIVS_GL_STALL_TOKEN_TO__MASK)
+#define VIVS_GL_STALL_TOKEN_FLIP0				0x40000000
+#define VIVS_GL_STALL_TOKEN_FLIP1				0x80000000
+
+#define VIVS_DUMMY						0x00000000
+
+#define VIVS_DUMMY_DUMMY					0x0003fffc
+
+
+#endif /* STATE_XML */
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
new file mode 100644
index 000000000000..0064f2640396
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -0,0 +1,407 @@
+#ifndef STATE_HI_XML
+#define STATE_HI_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
+- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+
+Copyright (C) 2015
+*/
+
+
+#define MMU_EXCEPTION_SLAVE_NOT_PRESENT				0x00000001
+#define MMU_EXCEPTION_PAGE_NOT_PRESENT				0x00000002
+#define MMU_EXCEPTION_WRITE_VIOLATION				0x00000003
+#define VIVS_HI							0x00000000
+
+#define VIVS_HI_CLOCK_CONTROL					0x00000000
+#define VIVS_HI_CLOCK_CONTROL_CLK3D_DIS				0x00000001
+#define VIVS_HI_CLOCK_CONTROL_CLK2D_DIS				0x00000002
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__MASK			0x000001fc
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__SHIFT			2
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(x)			(((x) << VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__SHIFT) & VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__MASK)
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_CMD_LOAD			0x00000200
+#define VIVS_HI_CLOCK_CONTROL_DISABLE_RAM_CLK_GATING		0x00000400
+#define VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS		0x00000800
+#define VIVS_HI_CLOCK_CONTROL_SOFT_RESET			0x00001000
+#define VIVS_HI_CLOCK_CONTROL_IDLE_3D				0x00010000
+#define VIVS_HI_CLOCK_CONTROL_IDLE_2D				0x00020000
+#define VIVS_HI_CLOCK_CONTROL_IDLE_VG				0x00040000
+#define VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU			0x00080000
+#define VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__MASK		0x00f00000
+#define VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__SHIFT		20
+#define VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE(x)		(((x) << VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__SHIFT) & VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__MASK)
+
+#define VIVS_HI_IDLE_STATE					0x00000004
+#define VIVS_HI_IDLE_STATE_FE					0x00000001
+#define VIVS_HI_IDLE_STATE_DE					0x00000002
+#define VIVS_HI_IDLE_STATE_PE					0x00000004
+#define VIVS_HI_IDLE_STATE_SH					0x00000008
+#define VIVS_HI_IDLE_STATE_PA					0x00000010
+#define VIVS_HI_IDLE_STATE_SE					0x00000020
+#define VIVS_HI_IDLE_STATE_RA					0x00000040
+#define VIVS_HI_IDLE_STATE_TX					0x00000080
+#define VIVS_HI_IDLE_STATE_VG					0x00000100
+#define VIVS_HI_IDLE_STATE_IM					0x00000200
+#define VIVS_HI_IDLE_STATE_FP					0x00000400
+#define VIVS_HI_IDLE_STATE_TS					0x00000800
+#define VIVS_HI_IDLE_STATE_AXI_LP				0x80000000
+
+#define VIVS_HI_AXI_CONFIG					0x00000008
+#define VIVS_HI_AXI_CONFIG_AWID__MASK				0x0000000f
+#define VIVS_HI_AXI_CONFIG_AWID__SHIFT				0
+#define VIVS_HI_AXI_CONFIG_AWID(x)				(((x) << VIVS_HI_AXI_CONFIG_AWID__SHIFT) & VIVS_HI_AXI_CONFIG_AWID__MASK)
+#define VIVS_HI_AXI_CONFIG_ARID__MASK				0x000000f0
+#define VIVS_HI_AXI_CONFIG_ARID__SHIFT				4
+#define VIVS_HI_AXI_CONFIG_ARID(x)				(((x) << VIVS_HI_AXI_CONFIG_ARID__SHIFT) & VIVS_HI_AXI_CONFIG_ARID__MASK)
+#define VIVS_HI_AXI_CONFIG_AWCACHE__MASK			0x00000f00
+#define VIVS_HI_AXI_CONFIG_AWCACHE__SHIFT			8
+#define VIVS_HI_AXI_CONFIG_AWCACHE(x)				(((x) << VIVS_HI_AXI_CONFIG_AWCACHE__SHIFT) & VIVS_HI_AXI_CONFIG_AWCACHE__MASK)
+#define VIVS_HI_AXI_CONFIG_ARCACHE__MASK			0x0000f000
+#define VIVS_HI_AXI_CONFIG_ARCACHE__SHIFT			12
+#define VIVS_HI_AXI_CONFIG_ARCACHE(x)				(((x) << VIVS_HI_AXI_CONFIG_ARCACHE__SHIFT) & VIVS_HI_AXI_CONFIG_ARCACHE__MASK)
+
+#define VIVS_HI_AXI_STATUS					0x0000000c
+#define VIVS_HI_AXI_STATUS_WR_ERR_ID__MASK			0x0000000f
+#define VIVS_HI_AXI_STATUS_WR_ERR_ID__SHIFT			0
+#define VIVS_HI_AXI_STATUS_WR_ERR_ID(x)				(((x) << VIVS_HI_AXI_STATUS_WR_ERR_ID__SHIFT) & VIVS_HI_AXI_STATUS_WR_ERR_ID__MASK)
+#define VIVS_HI_AXI_STATUS_RD_ERR_ID__MASK			0x000000f0
+#define VIVS_HI_AXI_STATUS_RD_ERR_ID__SHIFT			4
+#define VIVS_HI_AXI_STATUS_RD_ERR_ID(x)				(((x) << VIVS_HI_AXI_STATUS_RD_ERR_ID__SHIFT) & VIVS_HI_AXI_STATUS_RD_ERR_ID__MASK)
+#define VIVS_HI_AXI_STATUS_DET_WR_ERR				0x00000100
+#define VIVS_HI_AXI_STATUS_DET_RD_ERR				0x00000200
+
+#define VIVS_HI_INTR_ACKNOWLEDGE				0x00000010
+#define VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__MASK			0x7fffffff
+#define VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__SHIFT		0
+#define VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC(x)			(((x) << VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__SHIFT) & VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__MASK)
+#define VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR			0x80000000
+
+#define VIVS_HI_INTR_ENBL					0x00000014
+#define VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__MASK			0xffffffff
+#define VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__SHIFT			0
+#define VIVS_HI_INTR_ENBL_INTR_ENBL_VEC(x)			(((x) << VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__SHIFT) & VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__MASK)
+
+#define VIVS_HI_CHIP_IDENTITY					0x00000018
+#define VIVS_HI_CHIP_IDENTITY_FAMILY__MASK			0xff000000
+#define VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT			24
+#define VIVS_HI_CHIP_IDENTITY_FAMILY(x)				(((x) << VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
+#define VIVS_HI_CHIP_IDENTITY_PRODUCT__MASK			0x00ff0000
+#define VIVS_HI_CHIP_IDENTITY_PRODUCT__SHIFT			16
+#define VIVS_HI_CHIP_IDENTITY_PRODUCT(x)			(((x) << VIVS_HI_CHIP_IDENTITY_PRODUCT__SHIFT) & VIVS_HI_CHIP_IDENTITY_PRODUCT__MASK)
+#define VIVS_HI_CHIP_IDENTITY_REVISION__MASK			0x0000f000
+#define VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT			12
+#define VIVS_HI_CHIP_IDENTITY_REVISION(x)			(((x) << VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT) & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
+
+#define VIVS_HI_CHIP_FEATURE					0x0000001c
+
+#define VIVS_HI_CHIP_MODEL					0x00000020
+
+#define VIVS_HI_CHIP_REV					0x00000024
+
+#define VIVS_HI_CHIP_DATE					0x00000028
+
+#define VIVS_HI_CHIP_TIME					0x0000002c
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_0				0x00000034
+
+#define VIVS_HI_CACHE_CONTROL					0x00000038
+
+#define VIVS_HI_MEMORY_COUNTER_RESET				0x0000003c
+
+#define VIVS_HI_PROFILE_READ_BYTES8				0x00000040
+
+#define VIVS_HI_PROFILE_WRITE_BYTES8				0x00000044
+
+#define VIVS_HI_CHIP_SPECS					0x00000048
+#define VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK			0x0000000f
+#define VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT			0
+#define VIVS_HI_CHIP_SPECS_STREAM_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK			0x000000f0
+#define VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT			4
+#define VIVS_HI_CHIP_SPECS_REGISTER_MAX(x)			(((x) << VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT) & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
+#define VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK			0x00000f00
+#define VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT			8
+#define VIVS_HI_CHIP_SPECS_THREAD_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK		0x0001f000
+#define VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT		12
+#define VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE(x)			(((x) << VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT) & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
+#define VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK		0x01f00000
+#define VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT		20
+#define VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK			0x0e000000
+#define VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT			25
+#define VIVS_HI_CHIP_SPECS_PIXEL_PIPES(x)			(((x) << VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT) & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
+#define VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK	0xf0000000
+#define VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT	28
+#define VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE(x)		(((x) << VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT) & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
+
+#define VIVS_HI_PROFILE_WRITE_BURSTS				0x0000004c
+
+#define VIVS_HI_PROFILE_WRITE_REQUESTS				0x00000050
+
+#define VIVS_HI_PROFILE_READ_BURSTS				0x00000058
+
+#define VIVS_HI_PROFILE_READ_REQUESTS				0x0000005c
+
+#define VIVS_HI_PROFILE_READ_LASTS				0x00000060
+
+#define VIVS_HI_GP_OUT0						0x00000064
+
+#define VIVS_HI_GP_OUT1						0x00000068
+
+#define VIVS_HI_GP_OUT2						0x0000006c
+
+#define VIVS_HI_AXI_CONTROL					0x00000070
+#define VIVS_HI_AXI_CONTROL_WR_FULL_BURST_MODE			0x00000001
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_1				0x00000074
+
+#define VIVS_HI_PROFILE_TOTAL_CYCLES				0x00000078
+
+#define VIVS_HI_PROFILE_IDLE_CYCLES				0x0000007c
+
+#define VIVS_HI_CHIP_SPECS_2					0x00000080
+#define VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK			0x000000ff
+#define VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT			0
+#define VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE(x)			(((x) << VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT) & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
+#define VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK		0x0000ff00
+#define VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT		8
+#define VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT(x)		(((x) << VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK		0xffff0000
+#define VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT		16
+#define VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS(x)			(((x) << VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT) & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_2				0x00000084
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_3				0x00000088
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_4				0x00000094
+
+#define VIVS_PM							0x00000000
+
+#define VIVS_PM_POWER_CONTROLS					0x00000100
+#define VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING	0x00000001
+#define VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING	0x00000002
+#define VIVS_PM_POWER_CONTROLS_DISABLE_STARVE_MODULE_CLOCK_GATING	0x00000004
+#define VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__MASK		0x000000f0
+#define VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__SHIFT		4
+#define VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER(x)		(((x) << VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__SHIFT) & VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__MASK)
+#define VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__MASK		0xffff0000
+#define VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__SHIFT		16
+#define VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER(x)		(((x) << VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__SHIFT) & VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__MASK)
+
+#define VIVS_PM_MODULE_CONTROLS					0x00000104
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_FE	0x00000001
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_DE	0x00000002
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE	0x00000004
+
+#define VIVS_PM_MODULE_STATUS					0x00000108
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE		0x00000002
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE		0x00000004
+
+#define VIVS_PM_PULSE_EATER					0x0000010c
+
+#define VIVS_MMUv2						0x00000000
+
+#define VIVS_MMUv2_SAFE_ADDRESS					0x00000180
+
+#define VIVS_MMUv2_CONFIGURATION				0x00000184
+#define VIVS_MMUv2_CONFIGURATION_MODE__MASK			0x00000001
+#define VIVS_MMUv2_CONFIGURATION_MODE__SHIFT			0
+#define VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K			0x00000000
+#define VIVS_MMUv2_CONFIGURATION_MODE_MODE1_K			0x00000001
+#define VIVS_MMUv2_CONFIGURATION_MODE_MASK			0x00000008
+#define VIVS_MMUv2_CONFIGURATION_FLUSH__MASK			0x00000010
+#define VIVS_MMUv2_CONFIGURATION_FLUSH__SHIFT			4
+#define VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH			0x00000010
+#define VIVS_MMUv2_CONFIGURATION_FLUSH_MASK			0x00000080
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS_MASK			0x00000100
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS__MASK			0xfffffc00
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS__SHIFT			10
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS(x)			(((x) << VIVS_MMUv2_CONFIGURATION_ADDRESS__SHIFT) & VIVS_MMUv2_CONFIGURATION_ADDRESS__MASK)
+
+#define VIVS_MMUv2_STATUS					0x00000188
+#define VIVS_MMUv2_STATUS_EXCEPTION0__MASK			0x00000003
+#define VIVS_MMUv2_STATUS_EXCEPTION0__SHIFT			0
+#define VIVS_MMUv2_STATUS_EXCEPTION0(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION0__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION0__MASK)
+#define VIVS_MMUv2_STATUS_EXCEPTION1__MASK			0x00000030
+#define VIVS_MMUv2_STATUS_EXCEPTION1__SHIFT			4
+#define VIVS_MMUv2_STATUS_EXCEPTION1(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION1__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION1__MASK)
+#define VIVS_MMUv2_STATUS_EXCEPTION2__MASK			0x00000300
+#define VIVS_MMUv2_STATUS_EXCEPTION2__SHIFT			8
+#define VIVS_MMUv2_STATUS_EXCEPTION2(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION2__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION2__MASK)
+#define VIVS_MMUv2_STATUS_EXCEPTION3__MASK			0x00003000
+#define VIVS_MMUv2_STATUS_EXCEPTION3__SHIFT			12
+#define VIVS_MMUv2_STATUS_EXCEPTION3(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION3__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION3__MASK)
+
+#define VIVS_MMUv2_CONTROL					0x0000018c
+#define VIVS_MMUv2_CONTROL_ENABLE				0x00000001
+
+#define VIVS_MMUv2_EXCEPTION_ADDR(i0)			       (0x00000190 + 0x4*(i0))
+#define VIVS_MMUv2_EXCEPTION_ADDR__ESIZE			0x00000004
+#define VIVS_MMUv2_EXCEPTION_ADDR__LEN				0x00000004
+
+#define VIVS_MC							0x00000000
+
+#define VIVS_MC_MMU_FE_PAGE_TABLE				0x00000400
+
+#define VIVS_MC_MMU_TX_PAGE_TABLE				0x00000404
+
+#define VIVS_MC_MMU_PE_PAGE_TABLE				0x00000408
+
+#define VIVS_MC_MMU_PEZ_PAGE_TABLE				0x0000040c
+
+#define VIVS_MC_MMU_RA_PAGE_TABLE				0x00000410
+
+#define VIVS_MC_DEBUG_MEMORY					0x00000414
+#define VIVS_MC_DEBUG_MEMORY_SPECIAL_PATCH_GC320		0x00000008
+#define VIVS_MC_DEBUG_MEMORY_FAST_CLEAR_BYPASS			0x00100000
+#define VIVS_MC_DEBUG_MEMORY_COMPRESSION_BYPASS			0x00200000
+
+#define VIVS_MC_MEMORY_BASE_ADDR_RA				0x00000418
+
+#define VIVS_MC_MEMORY_BASE_ADDR_FE				0x0000041c
+
+#define VIVS_MC_MEMORY_BASE_ADDR_TX				0x00000420
+
+#define VIVS_MC_MEMORY_BASE_ADDR_PEZ				0x00000424
+
+#define VIVS_MC_MEMORY_BASE_ADDR_PE				0x00000428
+
+#define VIVS_MC_MEMORY_TIMING_CONTROL				0x0000042c
+
+#define VIVS_MC_MEMORY_FLUSH					0x00000430
+
+#define VIVS_MC_PROFILE_CYCLE_COUNTER				0x00000438
+
+#define VIVS_MC_DEBUG_READ0					0x0000043c
+
+#define VIVS_MC_DEBUG_READ1					0x00000440
+
+#define VIVS_MC_DEBUG_WRITE					0x00000444
+
+#define VIVS_MC_PROFILE_RA_READ					0x00000448
+
+#define VIVS_MC_PROFILE_TX_READ					0x0000044c
+
+#define VIVS_MC_PROFILE_FE_READ					0x00000450
+
+#define VIVS_MC_PROFILE_PE_READ					0x00000454
+
+#define VIVS_MC_PROFILE_DE_READ					0x00000458
+
+#define VIVS_MC_PROFILE_SH_READ					0x0000045c
+
+#define VIVS_MC_PROFILE_PA_READ					0x00000460
+
+#define VIVS_MC_PROFILE_SE_READ					0x00000464
+
+#define VIVS_MC_PROFILE_MC_READ					0x00000468
+
+#define VIVS_MC_PROFILE_HI_READ					0x0000046c
+
+#define VIVS_MC_PROFILE_CONFIG0					0x00000470
+#define VIVS_MC_PROFILE_CONFIG0_FE__MASK			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG0_FE__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG0_FE_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG0_DE__MASK			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG0_DE__SHIFT			8
+#define VIVS_MC_PROFILE_CONFIG0_DE_RESET			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG0_PE__MASK			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG0_PE__SHIFT			16
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_KILLED_BY_COLOR_PIPE	0x00000000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_KILLED_BY_DEPTH_PIPE	0x00010000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_DRAWN_BY_COLOR_PIPE	0x00020000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_DRAWN_BY_DEPTH_PIPE	0x00030000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXELS_RENDERED_2D		0x000b0000
+#define VIVS_MC_PROFILE_CONFIG0_PE_RESET			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG0_SH__MASK			0x0f000000
+#define VIVS_MC_PROFILE_CONFIG0_SH__SHIFT			24
+#define VIVS_MC_PROFILE_CONFIG0_SH_SHADER_CYCLES		0x04000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_PS_INST_COUNTER		0x07000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_RENDERED_PIXEL_COUNTER	0x08000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_VS_INST_COUNTER		0x09000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_RENDERED_VERTICE_COUNTER	0x0a000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_VTX_BRANCH_INST_COUNTER	0x0b000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_VTX_TEXLD_INST_COUNTER	0x0c000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_PXL_BRANCH_INST_COUNTER	0x0d000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_PXL_TEXLD_INST_COUNTER	0x0e000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_RESET			0x0f000000
+
+#define VIVS_MC_PROFILE_CONFIG1					0x00000474
+#define VIVS_MC_PROFILE_CONFIG1_PA__MASK			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG1_PA__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG1_PA_INPUT_VTX_COUNTER		0x00000003
+#define VIVS_MC_PROFILE_CONFIG1_PA_INPUT_PRIM_COUNTER		0x00000004
+#define VIVS_MC_PROFILE_CONFIG1_PA_OUTPUT_PRIM_COUNTER		0x00000005
+#define VIVS_MC_PROFILE_CONFIG1_PA_DEPTH_CLIPPED_COUNTER	0x00000006
+#define VIVS_MC_PROFILE_CONFIG1_PA_TRIVIAL_REJECTED_COUNTER	0x00000007
+#define VIVS_MC_PROFILE_CONFIG1_PA_CULLED_COUNTER		0x00000008
+#define VIVS_MC_PROFILE_CONFIG1_PA_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG1_SE__MASK			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG1_SE__SHIFT			8
+#define VIVS_MC_PROFILE_CONFIG1_SE_CULLED_TRIANGLE_COUNT	0x00000000
+#define VIVS_MC_PROFILE_CONFIG1_SE_CULLED_LINES_COUNT		0x00000100
+#define VIVS_MC_PROFILE_CONFIG1_SE_RESET			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG1_RA__MASK			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG1_RA__SHIFT			16
+#define VIVS_MC_PROFILE_CONFIG1_RA_VALID_PIXEL_COUNT		0x00000000
+#define VIVS_MC_PROFILE_CONFIG1_RA_TOTAL_QUAD_COUNT		0x00010000
+#define VIVS_MC_PROFILE_CONFIG1_RA_VALID_QUAD_COUNT_AFTER_EARLY_Z	0x00020000
+#define VIVS_MC_PROFILE_CONFIG1_RA_TOTAL_PRIMITIVE_COUNT	0x00030000
+#define VIVS_MC_PROFILE_CONFIG1_RA_PIPE_CACHE_MISS_COUNTER	0x00090000
+#define VIVS_MC_PROFILE_CONFIG1_RA_PREFETCH_CACHE_MISS_COUNTER	0x000a0000
+#define VIVS_MC_PROFILE_CONFIG1_RA_CULLED_QUAD_COUNT		0x000b0000
+#define VIVS_MC_PROFILE_CONFIG1_RA_RESET			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG1_TX__MASK			0x0f000000
+#define VIVS_MC_PROFILE_CONFIG1_TX__SHIFT			24
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_BILINEAR_REQUESTS	0x00000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_TRILINEAR_REQUESTS	0x01000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_DISCARDED_TEXTURE_REQUESTS	0x02000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_TEXTURE_REQUESTS	0x03000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_UNKNOWN			0x04000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_MEM_READ_COUNT		0x05000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_MEM_READ_IN_8B_COUNT		0x06000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_CACHE_MISS_COUNT		0x07000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_CACHE_HIT_TEXEL_COUNT	0x08000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_CACHE_MISS_TEXEL_COUNT	0x09000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_RESET			0x0f000000
+
+#define VIVS_MC_PROFILE_CONFIG2					0x00000478
+#define VIVS_MC_PROFILE_CONFIG2_MC__MASK			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG2_MC__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_PIPELINE	0x00000001
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_IP	0x00000002
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_8B_FROM_PIPELINE	0x00000003
+#define VIVS_MC_PROFILE_CONFIG2_MC_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG2_HI__MASK			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG2_HI__SHIFT			8
+#define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_READ_REQUEST_STALLED	0x00000000
+#define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_REQUEST_STALLED	0x00000100
+#define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_DATA_STALLED	0x00000200
+#define VIVS_MC_PROFILE_CONFIG2_HI_RESET			0x00000f00
+
+#define VIVS_MC_PROFILE_CONFIG3					0x0000047c
+
+#define VIVS_MC_BUS_CONFIG					0x00000480
+#define VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK			0x0000000f
+#define VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__SHIFT			0
+#define VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG(x)			(((x) << VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__SHIFT) & VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK)
+#define VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK			0x000000f0
+#define VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__SHIFT			4
+#define VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG(x)			(((x) << VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__SHIFT) & VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK)
+
+#define VIVS_MC_START_COMPOSITION				0x00000554
+
+#define VIVS_MC_128B_MERGE					0x00000558
+
+
+#endif /* STATE_HI_XML */
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
new file mode 100644
index 000000000000..4cc989ad6851
--- /dev/null
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_DRM_H__
+#define __ETNAVIV_DRM_H__
+
+#include "drm.h"
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints:
+ *  1) Do not use pointers, use __u64 instead for 32 bit / 64 bit
+ *     user/kernel compatibility
+ *  2) Keep fields aligned to their size
+ *  3) Because of how drm_ioctl() works, we can add new fields at
+ *     the end of an ioctl if some care is taken: drm_ioctl() will
+ *     zero out the new fields at the tail of the ioctl, so a zero
+ *     value should have a backwards compatible meaning.  And for
+ *     output params, userspace won't see the newly added output
+ *     fields.. so that has to be somehow ok.
+ */
+
+/* timeouts are specified in clock-monotonic absolute times (to simplify
+ * restarting interrupted ioctls).  The following struct is logically the
+ * same as 'struct timespec' but 32/64b ABI safe.
+ */
+struct drm_etnaviv_timespec {
+	__s64 tv_sec;          /* seconds */
+	__s64 tv_nsec;         /* nanoseconds */
+};
+
+#define ETNAVIV_PARAM_GPU_MODEL                     0x01
+#define ETNAVIV_PARAM_GPU_REVISION                  0x02
+#define ETNAVIV_PARAM_GPU_FEATURES_0                0x03
+#define ETNAVIV_PARAM_GPU_FEATURES_1                0x04
+#define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
+#define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
+#define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+
+#define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
+#define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
+#define ETNAVIV_PARAM_GPU_THREAD_COUNT              0x12
+#define ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE         0x13
+#define ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT         0x14
+#define ETNAVIV_PARAM_GPU_PIXEL_PIPES               0x15
+#define ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE 0x16
+#define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
+#define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
+#define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+
+#define ETNA_MAX_PIPES 4
+
+struct drm_etnaviv_param {
+	__u32 pipe;           /* in */
+	__u32 param;          /* in, ETNAVIV_PARAM_x */
+	__u64 value;          /* out (get_param) or in (set_param) */
+};
+
+/*
+ * GEM buffers:
+ */
+
+#define ETNA_BO_CACHE_MASK   0x000f0000
+/* cache modes */
+#define ETNA_BO_CACHED       0x00010000
+#define ETNA_BO_WC           0x00020000
+#define ETNA_BO_UNCACHED     0x00040000
+/* map flags */
+#define ETNA_BO_FORCE_MMU    0x00100000
+
+struct drm_etnaviv_gem_new {
+	__u64 size;           /* in */
+	__u32 flags;          /* in, mask of ETNA_BO_x */
+	__u32 handle;         /* out */
+};
+
+struct drm_etnaviv_gem_info {
+	__u32 handle;         /* in */
+	__u32 pad;
+	__u64 offset;         /* out, offset to pass to mmap() */
+};
+
+#define ETNA_PREP_READ        0x01
+#define ETNA_PREP_WRITE       0x02
+#define ETNA_PREP_NOSYNC      0x04
+
+struct drm_etnaviv_gem_cpu_prep {
+	__u32 handle;         /* in */
+	__u32 op;             /* in, mask of ETNA_PREP_x */
+	struct drm_etnaviv_timespec timeout;   /* in */
+};
+
+struct drm_etnaviv_gem_cpu_fini {
+	__u32 handle;         /* in */
+	__u32 flags;          /* in, placeholder for now, no defined values */
+};
+
+/*
+ * Cmdstream Submission:
+ */
+
+/* The value written into the cmdstream is logically:
+ * relocbuf->gpuaddr + reloc_offset
+ *
+ * NOTE that reloc's must be sorted by order of increasing submit_offset,
+ * otherwise EINVAL.
+ */
+struct drm_etnaviv_gem_submit_reloc {
+	__u32 submit_offset;  /* in, offset from submit_bo */
+	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
+	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
+	__u32 flags;          /* in, placeholder for now, no defined values */
+};
+
+/* Each buffer referenced elsewhere in the cmdstream submit (ie. the
+ * cmdstream buffer(s) themselves or reloc entries) has one (and only
+ * one) entry in the submit->bos[] table.
+ *
+ * As a optimization, the current buffer (gpu virtual address) can be
+ * passed back through the 'presumed' field.  If on a subsequent reloc,
+ * userspace passes back a 'presumed' address that is still valid,
+ * then patching the cmdstream for this entry is skipped.  This can
+ * avoid kernel needing to map/access the cmdstream bo in the common
+ * case.
+ */
+#define ETNA_SUBMIT_BO_READ             0x0001
+#define ETNA_SUBMIT_BO_WRITE            0x0002
+struct drm_etnaviv_gem_submit_bo {
+	__u32 flags;          /* in, mask of ETNA_SUBMIT_BO_x */
+	__u32 handle;         /* in, GEM handle */
+	__u64 presumed;       /* in/out, presumed buffer address */
+};
+
+/* Each cmdstream submit consists of a table of buffers involved, and
+ * one or more cmdstream buffers.  This allows for conditional execution
+ * (context-restore), and IB buffers needed for per tile/bin draw cmds.
+ */
+#define ETNA_PIPE_3D      0x00
+#define ETNA_PIPE_2D      0x01
+#define ETNA_PIPE_VG      0x02
+struct drm_etnaviv_gem_submit {
+	__u32 fence;          /* out */
+	__u32 pipe;           /* in */
+	__u32 exec_state;     /* in, initial execution state (ETNA_PIPE_x) */
+	__u32 nr_bos;         /* in, number of submit_bo's */
+	__u32 nr_relocs;      /* in, number of submit_reloc's */
+	__u32 stream_size;    /* in, cmdstream size */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
+	__u64 stream;         /* in, ptr to cmdstream */
+};
+
+/* The normal way to synchronize with the GPU is just to CPU_PREP on
+ * a buffer if you need to access it from the CPU (other cmdstream
+ * submission from same or other contexts, PAGE_FLIP ioctl, etc, all
+ * handle the required synchronization under the hood).  This ioctl
+ * mainly just exists as a way to implement the gallium pipe_fence
+ * APIs without requiring a dummy bo to synchronize on.
+ */
+#define ETNA_WAIT_NONBLOCK      0x01
+struct drm_etnaviv_wait_fence {
+	__u32 pipe;           /* in */
+	__u32 fence;          /* in */
+	__u32 flags;          /* in, mask of ETNA_WAIT_x */
+	__u32 pad;
+	struct drm_etnaviv_timespec timeout;   /* in */
+};
+
+#define ETNA_USERPTR_READ	0x01
+#define ETNA_USERPTR_WRITE	0x02
+struct drm_etnaviv_gem_userptr {
+	__u64 user_ptr;	/* in, page aligned user pointer */
+	__u64 user_size;	/* in, page aligned user size */
+	__u32 flags;		/* in, flags */
+	__u32 handle;	/* out, non-zero handle */
+};
+
+struct drm_etnaviv_gem_wait {
+	__u32 pipe;				/* in */
+	__u32 handle;				/* in, bo to be waited for */
+	__u32 flags;				/* in, mask of ETNA_WAIT_x  */
+	__u32 pad;
+	struct drm_etnaviv_timespec timeout;	/* in */
+};
+
+#define DRM_ETNAVIV_GET_PARAM          0x00
+/* placeholder:
+#define DRM_ETNAVIV_SET_PARAM          0x01
+ */
+#define DRM_ETNAVIV_GEM_NEW            0x02
+#define DRM_ETNAVIV_GEM_INFO           0x03
+#define DRM_ETNAVIV_GEM_CPU_PREP       0x04
+#define DRM_ETNAVIV_GEM_CPU_FINI       0x05
+#define DRM_ETNAVIV_GEM_SUBMIT         0x06
+#define DRM_ETNAVIV_WAIT_FENCE         0x07
+#define DRM_ETNAVIV_GEM_USERPTR        0x08
+#define DRM_ETNAVIV_GEM_WAIT           0x09
+#define DRM_ETNAVIV_NUM_IOCTLS         0x0a
+
+#define DRM_IOCTL_ETNAVIV_GET_PARAM    DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GET_PARAM, struct drm_etnaviv_param)
+#define DRM_IOCTL_ETNAVIV_GEM_NEW      DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_NEW, struct drm_etnaviv_gem_new)
+#define DRM_IOCTL_ETNAVIV_GEM_INFO     DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_INFO, struct drm_etnaviv_gem_info)
+#define DRM_IOCTL_ETNAVIV_GEM_CPU_PREP DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_CPU_PREP, struct drm_etnaviv_gem_cpu_prep)
+#define DRM_IOCTL_ETNAVIV_GEM_CPU_FINI DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_CPU_FINI, struct drm_etnaviv_gem_cpu_fini)
+#define DRM_IOCTL_ETNAVIV_GEM_SUBMIT   DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_SUBMIT, struct drm_etnaviv_gem_submit)
+#define DRM_IOCTL_ETNAVIV_WAIT_FENCE   DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_WAIT_FENCE, struct drm_etnaviv_wait_fence)
+#define DRM_IOCTL_ETNAVIV_GEM_USERPTR  DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_USERPTR, struct drm_etnaviv_gem_userptr)
+#define DRM_IOCTL_ETNAVIV_GEM_WAIT     DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_WAIT, struct drm_etnaviv_gem_wait)
+
+#endif /* __ETNAVIV_DRM_H__ */
-- 
cgit v1.2.3-58-ga151


From b75e1513fcdbc6e8988a57bc0d9fc21f8c4d7b2c Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Fri, 28 Aug 2015 11:23:03 +0300
Subject: drm/omap: remove unused plugin defines

Remove unused defines related to SGX plugin which are not used.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 drivers/gpu/drm/omapdrm/omap_drv.h | 6 ------
 include/uapi/drm/omap_drm.h        | 6 ------
 2 files changed, 12 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 718f032aa052..9e0030731c37 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -38,12 +38,6 @@
 
 struct omap_drm_usergart;
 
-/* max # of mapper-id's that can be assigned.. todo, come up with a better
- * (but still inexpensive) way to store/access per-buffer mapper private
- * data..
- */
-#define MAX_MAPPERS 2
-
 /* parameters which describe (unrotated) coordinates of scanout within a fb: */
 struct omap_drm_window {
 	uint32_t rotation;
diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 0750c01bb480..38a3bd847e15 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -101,9 +101,6 @@ struct drm_omap_gem_info {
 
 #define DRM_OMAP_GET_PARAM		0x00
 #define DRM_OMAP_SET_PARAM		0x01
-/* placeholder for plugin-api
-#define DRM_OMAP_GET_BASE		0x02
-*/
 #define DRM_OMAP_GEM_NEW		0x03
 #define DRM_OMAP_GEM_CPU_PREP		0x04
 #define DRM_OMAP_GEM_CPU_FINI		0x05
@@ -112,9 +109,6 @@ struct drm_omap_gem_info {
 
 #define DRM_IOCTL_OMAP_GET_PARAM	DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GET_PARAM, struct drm_omap_param)
 #define DRM_IOCTL_OMAP_SET_PARAM	DRM_IOW (DRM_COMMAND_BASE + DRM_OMAP_SET_PARAM, struct drm_omap_param)
-/* placeholder for plugin-api
-#define DRM_IOCTL_OMAP_GET_BASE		DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GET_BASE, struct drm_omap_get_base)
-*/
 #define DRM_IOCTL_OMAP_GEM_NEW		DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GEM_NEW, struct drm_omap_gem_new)
 #define DRM_IOCTL_OMAP_GEM_CPU_PREP	DRM_IOW (DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_PREP, struct drm_omap_gem_cpu_prep)
 #define DRM_IOCTL_OMAP_GEM_CPU_FINI	DRM_IOW (DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_FINI, struct drm_omap_gem_cpu_fini)
-- 
cgit v1.2.3-58-ga151