diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-25 11:52:57 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-25 11:52:57 -0800 |
commit | cac85e4616b1cf4a90844b952b49b9cbc4562530 (patch) | |
tree | 1d3d5f82188310497ced1a0ade2ab3b7a4346201 /drivers/vfio/pci | |
parent | 84cc6674b76ba2cdac0df8037b4d8a22a6fc1b77 (diff) | |
parent | d649c34cb916b015fdcb487e51409fcc5caeca8d (diff) |
Merge tag 'vfio-v6.3-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Remove redundant resource check in vfio-platform (Angus Chen)
- Use GFP_KERNEL_ACCOUNT for persistent userspace allocations, allowing
removal of arbitrary kernel limits in favor of cgroup control (Yishai
Hadas)
- mdev tidy-ups, including removing the module-only build restriction
for sample drivers, Kconfig changes to select mdev support,
documentation movement to keep sample driver usage instructions with
sample drivers rather than with API docs, remove references to
out-of-tree drivers in docs (Christoph Hellwig)
- Fix collateral breakages from mdev Kconfig changes (Arnd Bergmann)
- Make mlx5 migration support match device support, improve source and
target flows to improve pre-copy support and reduce downtime (Yishai
Hadas)
- Convert additional mdev sysfs case to use sysfs_emit() (Bo Liu)
- Resolve copy-paste error in mdev mbochs sample driver Kconfig (Ye
Xingchen)
- Avoid propagating missing reset error in vfio-platform if reset
requirement is relaxed by module option (Tomasz Duszynski)
- Range size fixes in mlx5 variant driver for missed last byte and
stricter range calculation (Yishai Hadas)
- Fixes to suspended vaddr support and locked_vm accounting, excluding
mdev configurations from the former due to potential to indefinitely
block kernel threads, fix underflow and restore locked_vm on new mm
(Steve Sistare)
- Update outdated vfio documentation due to new IOMMUFD interfaces in
recent kernels (Yi Liu)
- Resolve deadlock between group_lock and kvm_lock, finally (Matthew
Rosato)
- Fix NULL pointer in group initialization error path with IOMMUFD (Yan
Zhao)
* tag 'vfio-v6.3-rc1' of https://github.com/awilliam/linux-vfio: (32 commits)
vfio: Fix NULL pointer dereference caused by uninitialized group->iommufd
docs: vfio: Update vfio.rst per latest interfaces
vfio: Update the kdoc for vfio_device_ops
vfio/mlx5: Fix range size calculation upon tracker creation
vfio: no need to pass kvm pointer during device open
vfio: fix deadlock between group lock and kvm lock
vfio: revert "iommu driver notify callback"
vfio/type1: revert "implement notify callback"
vfio/type1: revert "block on invalid vaddr"
vfio/type1: restore locked_vm
vfio/type1: track locked_vm per dma
vfio/type1: prevent underflow of locked_vm via exec()
vfio/type1: exclude mdevs from VFIO_UPDATE_VADDR
vfio: platform: ignore missing reset if disabled at module init
vfio/mlx5: Improve the target side flow to reduce downtime
vfio/mlx5: Improve the source side flow upon pre_copy
vfio/mlx5: Check whether VF is migratable
samples: fix the prompt about SAMPLE_VFIO_MDEV_MBOCHS
vfio/mdev: Use sysfs_emit() to instead of sprintf()
vfio-mdev: add back CONFIG_VFIO dependency
...
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r-- | drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.c | 79 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.h | 28 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/main.c | 261 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 7 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_igd.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 10 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 2 |
9 files changed, 321 insertions, 78 deletions
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 0bba3b05c6c7..a117eaf21c14 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -744,7 +744,7 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct hisi_acc_vf_migration_file *migf; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); @@ -863,7 +863,7 @@ hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev) struct hisi_acc_vf_migration_file *migf; int ret; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 64e68d13cb98..deed156e6165 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -7,6 +7,29 @@ enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; +static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_cap = NULL, *cap; + int ret; + + query_cap = kzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_cap(mdev, func_id, query_cap, + MLX5_CAP_GENERAL_2); + if (ret) + goto out; + + cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(cmd_hca_cap_2, cap, migratable)) + ret = -EOPNOTSUPP; +out: + kfree(query_cap); + return ret; +} + static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, u16 *vhca_id); static void @@ -195,6 +218,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (mvdev->vf_id < 0) goto end; + ret = mlx5vf_is_migratable(mvdev->mdev, mvdev->vf_id + 1); + if (ret) + goto end; + if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, &mvdev->vhca_id)) goto end; @@ -373,7 +400,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *buf; int ret; - buf = kzalloc(sizeof(*buf), GFP_KERNEL); + buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); if (!buf) return ERR_PTR(-ENOMEM); @@ -473,7 +500,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) } static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, - size_t image_size) + size_t image_size, bool initial_pre_copy) { struct mlx5_vf_migration_file *migf = header_buf->migf; struct mlx5_vf_migration_header header = {}; @@ -481,7 +508,9 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, struct page *page; u8 *to_buff; - header.image_size = cpu_to_le64(image_size); + header.record_size = cpu_to_le64(image_size); + header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY); + header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA); page = mlx5vf_get_migration_page(header_buf, 0); if (!page) return -EINVAL; @@ -489,12 +518,13 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, memcpy(to_buff, &header, sizeof(header)); kunmap_local(to_buff); header_buf->length = sizeof(header); - header_buf->header_image_size = image_size; header_buf->start_pos = header_buf->migf->max_pos; migf->max_pos += header_buf->length; spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&header_buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); + if (initial_pre_copy) + migf->pre_copy_initial_bytes += sizeof(header); return 0; } @@ -508,11 +538,14 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) if (!status) { size_t image_size; unsigned long flags; + bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && + !async_data->last_chunk; image_size = MLX5_GET(save_vhca_state_out, async_data->out, actual_image_size); if (async_data->header_buf) { - status = add_buf_header(async_data->header_buf, image_size); + status = add_buf_header(async_data->header_buf, image_size, + initial_pre_copy); if (status) goto err; } @@ -522,6 +555,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) spin_lock_irqsave(&migf->list_lock, flags); list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); spin_unlock_irqrestore(&migf->list_lock, flags); + if (initial_pre_copy) + migf->pre_copy_initial_bytes += image_size; migf->state = async_data->last_chunk ? MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; wake_up_interruptible(&migf->poll_wait); @@ -583,11 +618,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, } if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - header_buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(header_buf)) { - err = PTR_ERR(header_buf); - goto err_free; + if (async_data->last_chunk && migf->buf_header) { + header_buf = migf->buf_header; + migf->buf_header = NULL; + } else { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; + } } } @@ -790,7 +830,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, node = interval_tree_iter_first(ranges, 0, ULONG_MAX); for (i = 0; i < num_ranges; i++) { void *addr_range_i_base = range_list_ptr + record_size * i; - unsigned long length = node->last - node->start; + unsigned long length = node->last - node->start + 1; MLX5_SET64(page_track_range, addr_range_i_base, start_address, node->start); @@ -800,7 +840,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, } WARN_ON(node); - log_addr_space_size = ilog2(total_ranges_len); + log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len)); if (log_addr_space_size < (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || log_addr_space_size > @@ -1032,18 +1072,18 @@ mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, void *in; int err; - qp = kzalloc(sizeof(*qp), GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL_ACCOUNT); if (!qp) return ERR_PTR(-ENOMEM); - qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); - log_rq_stride = ilog2(MLX5_SEND_WQE_DS); - log_rq_sz = ilog2(qp->rq.wqe_cnt); err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); if (err) goto err_free; if (max_recv_wr) { + qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); + log_rq_stride = ilog2(MLX5_SEND_WQE_DS); + log_rq_sz = ilog2(qp->rq.wqe_cnt); err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_rq_sz, log_rq_stride), &qp->buf, mdev->priv.numa_node); @@ -1213,12 +1253,13 @@ static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, int i; recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!recv_buf->page_list) return -ENOMEM; for (;;) { - filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done, + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, + npages - done, recv_buf->page_list + done); if (!filled) goto err; @@ -1248,7 +1289,7 @@ static int register_dma_recv_pages(struct mlx5_core_dev *mdev, recv_buf->dma_addrs = kvcalloc(recv_buf->npages, sizeof(*recv_buf->dma_addrs), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!recv_buf->dma_addrs) return -ENOMEM; diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 5483171d57ad..aec4c69dd6c1 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/vfio_pci_core.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> @@ -26,15 +27,33 @@ enum mlx5_vf_migf_state { enum mlx5_vf_load_state { MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, MLX5_VF_LOAD_STATE_READ_HEADER, + MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, + MLX5_VF_LOAD_STATE_READ_HEADER_DATA, MLX5_VF_LOAD_STATE_PREP_IMAGE, MLX5_VF_LOAD_STATE_READ_IMAGE, MLX5_VF_LOAD_STATE_LOAD_IMAGE, }; +struct mlx5_vf_migration_tag_stop_copy_data { + __le64 stop_copy_size; +}; + +enum mlx5_vf_migf_header_flags { + MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0, + MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0, +}; + +enum mlx5_vf_migf_header_tag { + MLX5_MIGF_HEADER_TAG_FW_DATA = 0, + MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE = 1 << 0, +}; + struct mlx5_vf_migration_header { - __le64 image_size; + __le64 record_size; /* For future use in case we may need to change the kernel protocol */ - __le64 flags; + __le32 flags; /* Use mlx5_vf_migf_header_flags */ + __le32 tag; /* Use mlx5_vf_migf_header_tag */ + __u8 data[]; /* Its size is given in the record_size */ }; struct mlx5_vhca_data_buffer { @@ -42,7 +61,6 @@ struct mlx5_vhca_data_buffer { loff_t start_pos; u64 length; u64 allocated_length; - u64 header_image_size; u32 mkey; enum dma_data_direction dma_dir; u8 dmaed:1; @@ -72,6 +90,10 @@ struct mlx5_vf_migration_file { enum mlx5_vf_load_state load_state; u32 pdn; loff_t max_pos; + u64 record_size; + u32 record_tag; + u64 stop_copy_prep_size; + u64 pre_copy_initial_bytes; struct mlx5_vhca_data_buffer *buf; struct mlx5_vhca_data_buffer *buf_header; spinlock_t list_lock; diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 9feb89c6d939..e897537a9e8a 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -21,8 +21,8 @@ #include "cmd.h" -/* Arbitrary to prevent userspace from consuming endless memory */ -#define MAX_MIGRATION_SIZE (512*1024*1024) +/* Device specification max LOAD size */ +#define MAX_LOAD_SIZE (BIT_ULL(__mlx5_bit_sz(load_vhca_state_in, size)) - 1) static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) { @@ -73,12 +73,13 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, int ret; to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); if (!page_list) return -ENOMEM; do { - filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list); + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, + page_list); if (!filled) { ret = -ENOMEM; goto err; @@ -87,7 +88,7 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, ret = sg_alloc_append_table_from_pages( &buf->table, page_list, filled, 0, filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (ret) goto err; @@ -303,6 +304,87 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) wake_up_interruptible(&migf->poll_wait); } +static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf) +{ + size_t size = sizeof(struct mlx5_vf_migration_header) + + sizeof(struct mlx5_vf_migration_tag_stop_copy_data); + struct mlx5_vf_migration_tag_stop_copy_data data = {}; + struct mlx5_vhca_data_buffer *header_buf = NULL; + struct mlx5_vf_migration_header header = {}; + unsigned long flags; + struct page *page; + u8 *to_buff; + int ret; + + header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE); + if (IS_ERR(header_buf)) + return PTR_ERR(header_buf); + + header.record_size = cpu_to_le64(sizeof(data)); + header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL); + header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE); + page = mlx5vf_get_migration_page(header_buf, 0); + if (!page) { + ret = -EINVAL; + goto err; + } + to_buff = kmap_local_page(page); + memcpy(to_buff, &header, sizeof(header)); + header_buf->length = sizeof(header); + data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length); + memcpy(to_buff + sizeof(header), &data, sizeof(data)); + header_buf->length += sizeof(data); + kunmap_local(to_buff); + header_buf->start_pos = header_buf->migf->max_pos; + migf->max_pos += header_buf->length; + spin_lock_irqsave(&migf->list_lock, flags); + list_add_tail(&header_buf->buf_elm, &migf->buf_list); + spin_unlock_irqrestore(&migf->list_lock, flags); + migf->pre_copy_initial_bytes = size; + return 0; +err: + mlx5vf_put_data_buffer(header_buf); + return ret; +} + +static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf, + size_t state_size) +{ + struct mlx5_vhca_data_buffer *buf; + size_t inc_state_size; + int ret; + + /* let's be ready for stop_copy size that might grow by 10 percents */ + if (check_add_overflow(state_size, state_size / 10, &inc_state_size)) + inc_state_size = state_size; + + buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + migf->buf = buf; + buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } + + migf->buf_header = buf; + ret = mlx5vf_add_stop_copy_header(migf); + if (ret) + goto err_header; + return 0; + +err_header: + mlx5vf_put_data_buffer(migf->buf_header); + migf->buf_header = NULL; +err: + mlx5vf_put_data_buffer(migf->buf); + migf->buf = NULL; + return ret; +} + static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -313,7 +395,7 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, loff_t *pos = &filp->f_pos; unsigned long minsz; size_t inc_length = 0; - bool end_of_data; + bool end_of_data = false; int ret; if (cmd != VFIO_MIG_GET_PRECOPY_INFO) @@ -357,25 +439,19 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, goto err_migf_unlock; } - buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); - if (buf) { - if (buf->start_pos == 0) { - info.initial_bytes = buf->header_image_size - *pos; - } else if (buf->start_pos == - sizeof(struct mlx5_vf_migration_header)) { - /* First data buffer following the header */ - info.initial_bytes = buf->start_pos + - buf->length - *pos; - } else { - info.dirty_bytes = buf->start_pos + buf->length - *pos; - } + if (migf->pre_copy_initial_bytes > *pos) { + info.initial_bytes = migf->pre_copy_initial_bytes - *pos; } else { - if (!end_of_data) { - ret = -EINVAL; - goto err_migf_unlock; + buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); + if (buf) { + info.dirty_bytes = buf->start_pos + buf->length - *pos; + } else { + if (!end_of_data) { + ret = -EINVAL; + goto err_migf_unlock; + } + info.dirty_bytes = inc_length; } - - info.dirty_bytes = inc_length; } if (!end_of_data || !inc_length) { @@ -440,10 +516,16 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) if (ret) goto err; - buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto err; + /* Checking whether we have a matching pre-allocated buffer that can fit */ + if (migf->buf && migf->buf->allocated_length >= length) { + buf = migf->buf; + migf->buf = NULL; + } else { + buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } } ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false); @@ -467,7 +549,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) size_t length; int ret; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); @@ -502,6 +584,12 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) if (ret) goto out_pd; + if (track) { + ret = mlx5vf_prep_stop_copy(migf, length); + if (ret) + goto out_pd; + } + buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE); if (IS_ERR(buf)) { ret = PTR_ERR(buf); @@ -515,7 +603,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) out_save: mlx5vf_free_data_buffer(buf); out_pd: - mlx5vf_cmd_dealloc_pd(migf); + mlx5fv_cmd_clean_migf_resources(migf); out_free: fput(migf->filp); end: @@ -564,7 +652,7 @@ mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, { int ret; - if (requested_length > MAX_MIGRATION_SIZE) + if (requested_length > MAX_LOAD_SIZE) return -ENOMEM; if (vhca_buf->allocated_length < requested_length) { @@ -616,6 +704,56 @@ mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, } static int +mlx5vf_resume_read_header_data(struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *vhca_buf, + const char __user **buf, size_t *len, + loff_t *pos, ssize_t *done) +{ + size_t copy_len, to_copy; + size_t required_data; + u8 *to_buff; + int ret; + + required_data = migf->record_size - vhca_buf->length; + to_copy = min_t(size_t, *len, required_data); + copy_len = to_copy; + while (to_copy) { + ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos, + done); + if (ret) + return ret; + } + + *len -= copy_len; + if (vhca_buf->length == migf->record_size) { + switch (migf->record_tag) { + case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE: + { + struct page *page; + + page = mlx5vf_get_migration_page(vhca_buf, 0); + if (!page) + return -EINVAL; + to_buff = kmap_local_page(page); + migf->stop_copy_prep_size = min_t(u64, + le64_to_cpup((__le64 *)to_buff), MAX_LOAD_SIZE); + kunmap_local(to_buff); + break; + } + default: + /* Optional tag */ + break; + } + + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + migf->max_pos += migf->record_size; + vhca_buf->length = 0; + } + + return 0; +} + +static int mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *vhca_buf, const char __user **buf, @@ -645,23 +783,38 @@ mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf, *len -= copy_len; vhca_buf->length += copy_len; if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) { - u64 flags; + u64 record_size; + u32 flags; - vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff); - if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) { + record_size = le64_to_cpup((__le64 *)to_buff); + if (record_size > MAX_LOAD_SIZE) { ret = -ENOMEM; goto end; } - flags = le64_to_cpup((__le64 *)(to_buff + + migf->record_size = record_size; + flags = le32_to_cpup((__le32 *)(to_buff + offsetof(struct mlx5_vf_migration_header, flags))); - if (flags) { - ret = -EOPNOTSUPP; - goto end; + migf->record_tag = le32_to_cpup((__le32 *)(to_buff + + offsetof(struct mlx5_vf_migration_header, tag))); + switch (migf->record_tag) { + case MLX5_MIGF_HEADER_TAG_FW_DATA: + migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE; + break; + case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE: + migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA; + break; + default: + if (!(flags & MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) { + ret = -EOPNOTSUPP; + goto end; + } + /* We may read and skip this optional record data */ + migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA; } - migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE; migf->max_pos += vhca_buf->length; + vhca_buf->length = 0; *has_work = true; } end: @@ -705,9 +858,34 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, if (ret) goto out_unlock; break; + case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA: + if (vhca_buf_header->allocated_length < migf->record_size) { + mlx5vf_free_data_buffer(vhca_buf_header); + + migf->buf_header = mlx5vf_alloc_data_buffer(migf, + migf->record_size, DMA_NONE); + if (IS_ERR(migf->buf_header)) { + ret = PTR_ERR(migf->buf_header); + migf->buf_header = NULL; + goto out_unlock; + } + + vhca_buf_header = migf->buf_header; + } + + vhca_buf_header->start_pos = migf->max_pos; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA; + break; + case MLX5_VF_LOAD_STATE_READ_HEADER_DATA: + ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header, + &buf, &len, pos, &done); + if (ret) + goto out_unlock; + break; case MLX5_VF_LOAD_STATE_PREP_IMAGE: { - u64 size = vhca_buf_header->header_image_size; + u64 size = max(migf->record_size, + migf->stop_copy_prep_size); if (vhca_buf->allocated_length < size) { mlx5vf_free_data_buffer(vhca_buf); @@ -736,7 +914,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, break; case MLX5_VF_LOAD_STATE_READ_IMAGE: ret = mlx5vf_resume_read_image(migf, vhca_buf, - vhca_buf_header->header_image_size, + migf->record_size, &buf, &len, pos, &done, &has_work); if (ret) goto out_unlock; @@ -749,7 +927,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, /* prep header buf for next image */ vhca_buf_header->length = 0; - vhca_buf_header->header_image_size = 0; /* prep data buf for next image */ vhca_buf->length = 0; @@ -781,7 +958,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) struct mlx5_vhca_data_buffer *buf; int ret; - migf = kzalloc(sizeof(*migf), GFP_KERNEL); + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); if (!migf) return ERR_PTR(-ENOMEM); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 4a350421c5f6..523e0144c86f 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1244,7 +1244,7 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos) if (vdev->msi_perm) return len; - vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL); + vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL_ACCOUNT); if (!vdev->msi_perm) return -ENOMEM; @@ -1731,11 +1731,11 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) * no requirements on the length of a capability, so the gap between * capabilities needs byte granularity. */ - map = kmalloc(pdev->cfg_size, GFP_KERNEL); + map = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!map) return -ENOMEM; - vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL); + vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT); if (!vconfig) { kfree(map); return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index c49f8f2b2865..a5ab416cf476 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -144,7 +144,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) * of the exclusive page in case that hot-add * device's bar is assigned into it. */ - dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL); + dummy_res = + kzalloc(sizeof(*dummy_res), GFP_KERNEL_ACCOUNT); if (dummy_res == NULL) goto no_mmap; @@ -863,7 +864,7 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, region = krealloc(vdev->region, (vdev->num_regions + 1) * sizeof(*region), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!region) return -ENOMEM; @@ -1644,7 +1645,7 @@ static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, { struct vfio_pci_mmap_vma *mmap_vma; - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); + mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); if (!mmap_vma) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 5e6ca5926954..dd70e2431bd7 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -180,7 +180,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) if (!addr || !(~addr)) return -ENODEV; - opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL); + opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL_ACCOUNT); if (!opregionvbt) return -ENOMEM; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 40c3d7cf163f..bffb0741518b 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -177,7 +177,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev) if (!vdev->pdev->irq) return -ENODEV; - vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -216,7 +216,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) if (fd < 0) /* Disable only */ return 0; - vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", + vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); if (!vdev->ctx[0].name) return -ENOMEM; @@ -284,7 +284,8 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi if (!is_irq_none(vdev)) return -EINVAL; - vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), + GFP_KERNEL_ACCOUNT); if (!vdev->ctx) return -ENOMEM; @@ -343,7 +344,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, if (fd < 0) return 0; - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", + vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT, + "vfio-msi%s[%d](%s)", msix ? "x" : "", vector, pci_name(pdev)); if (!vdev->ctx[vector].name) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e352a033b4ae..e27de61ac9fe 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -470,7 +470,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, goto out_unlock; } - ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); + ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT); if (!ioeventfd) { ret = -ENOMEM; goto out_unlock; |