summaryrefslogtreecommitdiff
path: root/lib/test_hmm.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/test_hmm.c')
-rw-r--r--lib/test_hmm.c347
1 files changed, 261 insertions, 86 deletions
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index cfe632047839..e3965cafd27c 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -32,11 +32,32 @@
#include "test_hmm_uapi.h"
-#define DMIRROR_NDEVICES 2
+#define DMIRROR_NDEVICES 4
#define DMIRROR_RANGE_FAULT_TIMEOUT 1000
#define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U)
#define DEVMEM_CHUNKS_RESERVE 16
+/*
+ * For device_private pages, dpage is just a dummy struct page
+ * representing a piece of device memory. dmirror_devmem_alloc_page
+ * allocates a real system memory page as backing storage to fake a
+ * real device. zone_device_data points to that backing page. But
+ * for device_coherent memory, the struct page represents real
+ * physical CPU-accessible memory that we can use directly.
+ */
+#define BACKING_PAGE(page) (is_device_private_page((page)) ? \
+ (page)->zone_device_data : (page))
+
+static unsigned long spm_addr_dev0;
+module_param(spm_addr_dev0, long, 0644);
+MODULE_PARM_DESC(spm_addr_dev0,
+ "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
+
+static unsigned long spm_addr_dev1;
+module_param(spm_addr_dev1, long, 0644);
+MODULE_PARM_DESC(spm_addr_dev1,
+ "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
+
static const struct dev_pagemap_ops dmirror_devmem_ops;
static const struct mmu_interval_notifier_ops dmirror_min_ops;
static dev_t dmirror_dev;
@@ -87,6 +108,7 @@ struct dmirror_chunk {
struct dmirror_device {
struct cdev cdevice;
struct hmm_devmem *devmem;
+ unsigned int zone_device_type;
unsigned int devmem_capacity;
unsigned int devmem_count;
@@ -114,6 +136,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce,
return 0;
}
+static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
+{
+ return (mdevice->zone_device_type ==
+ HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
+}
+
+static enum migrate_vma_direction
+dmirror_select_device(struct dmirror *dmirror)
+{
+ return (dmirror->mdevice->zone_device_type ==
+ HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
+ MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
+ MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+}
+
static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
{
vfree(bounce->ptr);
@@ -454,28 +491,44 @@ fini:
return ret;
}
-static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
+static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
struct page **ppage)
{
struct dmirror_chunk *devmem;
- struct resource *res;
+ struct resource *res = NULL;
unsigned long pfn;
unsigned long pfn_first;
unsigned long pfn_last;
void *ptr;
+ int ret = -ENOMEM;
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
- return false;
+ return ret;
- res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
- "hmm_dmirror");
- if (IS_ERR(res))
+ switch (mdevice->zone_device_type) {
+ case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE:
+ res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
+ "hmm_dmirror");
+ if (IS_ERR_OR_NULL(res))
+ goto err_devmem;
+ devmem->pagemap.range.start = res->start;
+ devmem->pagemap.range.end = res->end;
+ devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+ break;
+ case HMM_DMIRROR_MEMORY_DEVICE_COHERENT:
+ devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ?
+ spm_addr_dev0 :
+ spm_addr_dev1;
+ devmem->pagemap.range.end = devmem->pagemap.range.start +
+ DEVMEM_CHUNK_SIZE - 1;
+ devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
+ break;
+ default:
+ ret = -EINVAL;
goto err_devmem;
+ }
- devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
- devmem->pagemap.range.start = res->start;
- devmem->pagemap.range.end = res->end;
devmem->pagemap.nr_range = 1;
devmem->pagemap.ops = &dmirror_devmem_ops;
devmem->pagemap.owner = mdevice;
@@ -496,10 +549,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
mdevice->devmem_capacity = new_capacity;
mdevice->devmem_chunks = new_chunks;
}
-
ptr = memremap_pages(&devmem->pagemap, numa_node_id());
- if (IS_ERR(ptr))
+ if (IS_ERR_OR_NULL(ptr)) {
+ if (ptr)
+ ret = PTR_ERR(ptr);
+ else
+ ret = -EFAULT;
goto err_release;
+ }
devmem->mdevice = mdevice;
pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
@@ -528,30 +585,35 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
}
spin_unlock(&mdevice->lock);
- return true;
+ return 0;
err_release:
mutex_unlock(&mdevice->devmem_lock);
- release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
+ if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+ release_mem_region(devmem->pagemap.range.start,
+ range_len(&devmem->pagemap.range));
err_devmem:
kfree(devmem);
- return false;
+ return ret;
}
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
{
struct page *dpage = NULL;
- struct page *rpage;
+ struct page *rpage = NULL;
/*
- * This is a fake device so we alloc real system memory to store
- * our device memory.
+ * For ZONE_DEVICE private type, this is a fake device so we allocate
+ * real system memory to store our device memory.
+ * For ZONE_DEVICE coherent type we use the actual dpage to store the
+ * data and ignore rpage.
*/
- rpage = alloc_page(GFP_HIGHUSER);
- if (!rpage)
- return NULL;
-
+ if (dmirror_is_private_zone(mdevice)) {
+ rpage = alloc_page(GFP_HIGHUSER);
+ if (!rpage)
+ return NULL;
+ }
spin_lock(&mdevice->lock);
if (mdevice->free_pages) {
@@ -561,7 +623,7 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
spin_unlock(&mdevice->lock);
} else {
spin_unlock(&mdevice->lock);
- if (!dmirror_allocate_chunk(mdevice, &dpage))
+ if (dmirror_allocate_chunk(mdevice, &dpage))
goto error;
}
@@ -570,7 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
return dpage;
error:
- __free_page(rpage);
+ if (rpage)
+ __free_page(rpage);
return NULL;
}
@@ -596,12 +659,16 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
* unallocated pte_none() or read-only zero page.
*/
spage = migrate_pfn_to_page(*src);
+ if (WARN(spage && is_zone_device_page(spage),
+ "page already in device spage pfn: 0x%lx\n",
+ page_to_pfn(spage)))
+ continue;
dpage = dmirror_devmem_alloc_page(mdevice);
if (!dpage)
continue;
- rpage = dpage->zone_device_data;
+ rpage = BACKING_PAGE(dpage);
if (spage)
copy_highpage(rpage, spage);
else
@@ -615,6 +682,8 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
*/
rpage->zone_device_data = dmirror;
+ pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
+ page_to_pfn(spage), page_to_pfn(dpage));
*dst = migrate_pfn(page_to_pfn(dpage));
if ((*src & MIGRATE_PFN_WRITE) ||
(!spage && args->vma->vm_flags & VM_WRITE))
@@ -692,11 +761,7 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
if (!dpage)
continue;
- /*
- * Store the page that holds the data so the page table
- * doesn't have to deal with ZONE_DEVICE private pages.
- */
- entry = dpage->zone_device_data;
+ entry = BACKING_PAGE(dpage);
if (*dst & MIGRATE_PFN_WRITE)
entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
@@ -732,7 +797,7 @@ static int dmirror_exclusive(struct dmirror *dmirror,
mmap_read_lock(mm);
for (addr = start; addr < end; addr = next) {
- unsigned long mapped;
+ unsigned long mapped = 0;
int i;
if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
@@ -741,7 +806,13 @@ static int dmirror_exclusive(struct dmirror *dmirror,
next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
- mapped = dmirror_atomic_map(addr, next, pages, dmirror);
+ /*
+ * Do dmirror_atomic_map() iff all pages are marked for
+ * exclusive access to avoid accessing uninitialized
+ * fields of pages.
+ */
+ if (ret == (next - addr) >> PAGE_SHIFT)
+ mapped = dmirror_atomic_map(addr, next, pages, dmirror);
for (i = 0; i < ret; i++) {
if (pages[i]) {
unlock_page(pages[i]);
@@ -776,15 +847,126 @@ static int dmirror_exclusive(struct dmirror *dmirror,
return ret;
}
-static int dmirror_migrate(struct dmirror *dmirror,
- struct hmm_dmirror_cmd *cmd)
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
+ struct dmirror *dmirror)
+{
+ const unsigned long *src = args->src;
+ unsigned long *dst = args->dst;
+ unsigned long start = args->start;
+ unsigned long end = args->end;
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE,
+ src++, dst++) {
+ struct page *dpage, *spage;
+
+ spage = migrate_pfn_to_page(*src);
+ if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
+ continue;
+
+ if (WARN_ON(!is_device_private_page(spage) &&
+ !is_device_coherent_page(spage)))
+ continue;
+ spage = BACKING_PAGE(spage);
+ dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
+ if (!dpage)
+ continue;
+ pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
+ page_to_pfn(spage), page_to_pfn(dpage));
+
+ lock_page(dpage);
+ xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
+ copy_highpage(dpage, spage);
+ *dst = migrate_pfn(page_to_pfn(dpage));
+ if (*src & MIGRATE_PFN_WRITE)
+ *dst |= MIGRATE_PFN_WRITE;
+ }
+ return 0;
+}
+
+static unsigned long
+dmirror_successful_migrated_pages(struct migrate_vma *migrate)
+{
+ unsigned long cpages = 0;
+ unsigned long i;
+
+ for (i = 0; i < migrate->npages; i++) {
+ if (migrate->src[i] & MIGRATE_PFN_VALID &&
+ migrate->src[i] & MIGRATE_PFN_MIGRATE)
+ cpages++;
+ }
+ return cpages;
+}
+
+static int dmirror_migrate_to_system(struct dmirror *dmirror,
+ struct hmm_dmirror_cmd *cmd)
{
unsigned long start, end, addr;
unsigned long size = cmd->npages << PAGE_SHIFT;
struct mm_struct *mm = dmirror->notifier.mm;
struct vm_area_struct *vma;
- unsigned long src_pfns[64];
- unsigned long dst_pfns[64];
+ unsigned long src_pfns[64] = { 0 };
+ unsigned long dst_pfns[64] = { 0 };
+ struct migrate_vma args;
+ unsigned long next;
+ int ret;
+
+ start = cmd->addr;
+ end = start + size;
+ if (end < start)
+ return -EINVAL;
+
+ /* Since the mm is for the mirrored process, get a reference first. */
+ if (!mmget_not_zero(mm))
+ return -EINVAL;
+
+ cmd->cpages = 0;
+ mmap_read_lock(mm);
+ for (addr = start; addr < end; addr = next) {
+ vma = vma_lookup(mm, addr);
+ if (!vma || !(vma->vm_flags & VM_READ)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
+ if (next > vma->vm_end)
+ next = vma->vm_end;
+
+ args.vma = vma;
+ args.src = src_pfns;
+ args.dst = dst_pfns;
+ args.start = addr;
+ args.end = next;
+ args.pgmap_owner = dmirror->mdevice;
+ args.flags = dmirror_select_device(dmirror);
+
+ ret = migrate_vma_setup(&args);
+ if (ret)
+ goto out;
+
+ pr_debug("Migrating from device mem to sys mem\n");
+ dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
+
+ migrate_vma_pages(&args);
+ cmd->cpages += dmirror_successful_migrated_pages(&args);
+ migrate_vma_finalize(&args);
+ }
+out:
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ return ret;
+}
+
+static int dmirror_migrate_to_device(struct dmirror *dmirror,
+ struct hmm_dmirror_cmd *cmd)
+{
+ unsigned long start, end, addr;
+ unsigned long size = cmd->npages << PAGE_SHIFT;
+ struct mm_struct *mm = dmirror->notifier.mm;
+ struct vm_area_struct *vma;
+ unsigned long src_pfns[64] = { 0 };
+ unsigned long dst_pfns[64] = { 0 };
struct dmirror_bounce bounce;
struct migrate_vma args;
unsigned long next;
@@ -821,6 +1003,7 @@ static int dmirror_migrate(struct dmirror *dmirror,
if (ret)
goto out;
+ pr_debug("Migrating from sys mem to device mem\n");
dmirror_migrate_alloc_and_copy(&args, dmirror);
migrate_vma_pages(&args);
dmirror_migrate_finalize_and_map(&args, dmirror);
@@ -829,7 +1012,10 @@ static int dmirror_migrate(struct dmirror *dmirror,
mmap_read_unlock(mm);
mmput(mm);
- /* Return the migrated data for verification. */
+ /*
+ * Return the migrated data for verification.
+ * Only for pages in device zone
+ */
ret = dmirror_bounce_init(&bounce, start, size);
if (ret)
return ret;
@@ -872,6 +1058,12 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
else
*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
+ } else if (is_device_coherent_page(page)) {
+ /* Is the page migrated to this device or some other? */
+ if (dmirror->mdevice == dmirror_page_to_device(page))
+ *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
+ else
+ *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
} else if (is_zero_pfn(page_to_pfn(page)))
*perm = HMM_DMIRROR_PROT_ZERO;
else
@@ -1059,8 +1251,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
ret = dmirror_write(dmirror, &cmd);
break;
- case HMM_DMIRROR_MIGRATE:
- ret = dmirror_migrate(dmirror, &cmd);
+ case HMM_DMIRROR_MIGRATE_TO_DEV:
+ ret = dmirror_migrate_to_device(dmirror, &cmd);
+ break;
+
+ case HMM_DMIRROR_MIGRATE_TO_SYS:
+ ret = dmirror_migrate_to_system(dmirror, &cmd);
break;
case HMM_DMIRROR_EXCLUSIVE:
@@ -1122,14 +1318,13 @@ static const struct file_operations dmirror_fops = {
static void dmirror_devmem_free(struct page *page)
{
- struct page *rpage = page->zone_device_data;
+ struct page *rpage = BACKING_PAGE(page);
struct dmirror_device *mdevice;
- if (rpage)
+ if (rpage != page)
__free_page(rpage);
mdevice = dmirror_page_to_device(page);
-
spin_lock(&mdevice->lock);
mdevice->cfree++;
page->zone_device_data = mdevice->free_pages;
@@ -1137,43 +1332,11 @@ static void dmirror_devmem_free(struct page *page)
spin_unlock(&mdevice->lock);
}
-static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
- struct dmirror *dmirror)
-{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
-
- for (addr = start; addr < end; addr += PAGE_SIZE,
- src++, dst++) {
- struct page *dpage, *spage;
-
- spage = migrate_pfn_to_page(*src);
- if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
- continue;
- spage = spage->zone_device_data;
-
- dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
- if (!dpage)
- continue;
-
- lock_page(dpage);
- xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
- copy_highpage(dpage, spage);
- *dst = migrate_pfn(page_to_pfn(dpage));
- if (*src & MIGRATE_PFN_WRITE)
- *dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
-}
-
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
{
struct migrate_vma args;
- unsigned long src_pfns;
- unsigned long dst_pfns;
+ unsigned long src_pfns = 0;
+ unsigned long dst_pfns = 0;
struct page *rpage;
struct dmirror *dmirror;
vm_fault_t ret;
@@ -1193,7 +1356,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
args.src = &src_pfns;
args.dst = &dst_pfns;
args.pgmap_owner = dmirror->mdevice;
- args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ args.flags = dmirror_select_device(dmirror);
if (migrate_vma_setup(&args))
return VM_FAULT_SIGBUS;
@@ -1231,10 +1394,8 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
if (ret)
return ret;
- /* Build a list of free ZONE_DEVICE private struct pages */
- dmirror_allocate_chunk(mdevice, NULL);
-
- return 0;
+ /* Build a list of free ZONE_DEVICE struct pages */
+ return dmirror_allocate_chunk(mdevice, NULL);
}
static void dmirror_device_remove(struct dmirror_device *mdevice)
@@ -1247,8 +1408,9 @@ static void dmirror_device_remove(struct dmirror_device *mdevice)
mdevice->devmem_chunks[i];
memunmap_pages(&devmem->pagemap);
- release_mem_region(devmem->pagemap.range.start,
- range_len(&devmem->pagemap.range));
+ if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
+ release_mem_region(devmem->pagemap.range.start,
+ range_len(&devmem->pagemap.range));
kfree(devmem);
}
kfree(mdevice->devmem_chunks);
@@ -1260,14 +1422,26 @@ static void dmirror_device_remove(struct dmirror_device *mdevice)
static int __init hmm_dmirror_init(void)
{
int ret;
- int id;
+ int id = 0;
+ int ndevices = 0;
ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,
"HMM_DMIRROR");
if (ret)
goto err_unreg;
- for (id = 0; id < DMIRROR_NDEVICES; id++) {
+ memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0]));
+ dmirror_devices[ndevices++].zone_device_type =
+ HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
+ dmirror_devices[ndevices++].zone_device_type =
+ HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
+ if (spm_addr_dev0 && spm_addr_dev1) {
+ dmirror_devices[ndevices++].zone_device_type =
+ HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
+ dmirror_devices[ndevices++].zone_device_type =
+ HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
+ }
+ for (id = 0; id < ndevices; id++) {
ret = dmirror_device_init(dmirror_devices + id, id);
if (ret)
goto err_chrdev;
@@ -1289,7 +1463,8 @@ static void __exit hmm_dmirror_exit(void)
int id;
for (id = 0; id < DMIRROR_NDEVICES; id++)
- dmirror_device_remove(dmirror_devices + id);
+ if (dmirror_devices[id].zone_device_type)
+ dmirror_device_remove(dmirror_devices + id);
unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
}