diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-05-05 13:31:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-05-05 13:31:39 -0700 |
commit | 16bb86b5569cb7489367101f6ed69b25682b47db (patch) | |
tree | 4b0ec63032cb93491d65fc5766243bfc7ff60346 | |
parent | 57151b502cbc0fa6ff9074a76883fa9d9eda322e (diff) | |
parent | d7bce85aa7b92b5de8f69b3bcedfe51d7b1aabe1 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
"A bunch of new drivers including vdpa support for block and
virtio-vdpa.
Beginning of vq kick (aka doorbell) mapping support.
Misc fixes"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits)
virtio_pci_modern: correct sparse tags for notify
virtio_pci_modern: __force cast the notify mapping
vDPA/ifcvf: get_config_size should return dev specific config size
vDPA/ifcvf: enable Intel C5000X-PL virtio-block for vDPA
vDPA/ifcvf: deduce VIRTIO device ID when probe
vdpa_sim_blk: add support for vdpa management tool
vdpa_sim_blk: handle VIRTIO_BLK_T_GET_ID
vdpa_sim_blk: implement ramdisk behaviour
vdpa: add vdpa simulator for block device
vhost/vdpa: Remove the restriction that only supports virtio-net devices
vhost/vdpa: use get_config_size callback in vhost_vdpa_config_validate()
vdpa: add get_config_size callback in vdpa_config_ops
vdpa_sim: cleanup kiovs in vdpasim_free()
vringh: add vringh_kiov_length() helper
vringh: implement vringh_kiov_advance()
vringh: explain more about cleaning riov and wiov
vringh: reset kiov 'consumed' field in __vringh_iov()
vringh: add 'iotlb_lock' to synchronize iotlb accesses
vdpa_sim: use iova module to allocate IOVA addresses
vDPA/ifcvf: deduce VIRTIO device ID from pdev ids
...
-rw-r--r-- | drivers/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 10 | ||||
-rw-r--r-- | drivers/vdpa/Kconfig | 15 | ||||
-rw-r--r-- | drivers/vdpa/Makefile | 1 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_base.c | 24 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_base.h | 26 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_main.c | 86 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 85 | ||||
-rw-r--r-- | drivers/vdpa/vdpa.c | 12 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/Makefile | 1 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim.c | 127 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim.h | 2 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 338 | ||||
-rw-r--r-- | drivers/vdpa/virtio_pci/Makefile | 2 | ||||
-rw-r--r-- | drivers/vdpa/virtio_pci/vp_vdpa.c | 484 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 16 | ||||
-rw-r--r-- | drivers/vhost/vringh.c | 69 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 2 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_modern.c | 27 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_modern_dev.c | 67 | ||||
-rw-r--r-- | include/linux/vdpa.h | 42 | ||||
-rw-r--r-- | include/linux/virtio_pci_modern.h | 11 | ||||
-rw-r--r-- | include/linux/vringh.h | 19 |
23 files changed, 1295 insertions, 172 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 8f3fee8281ad..5a6d613e868d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_DMADEVICES) += dma/ obj-y += soc/ obj-$(CONFIG_VIRTIO) += virtio/ +obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/ obj-$(CONFIG_VDPA) += vdpa/ obj-$(CONFIG_XEN) += xen/ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7fda2ae4c40f..9b6a4a875c55 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2870,9 +2870,13 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) { int i; - vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); - if (!vi->ctrl) - goto err_ctrl; + if (vi->has_cvq) { + vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); + if (!vi->ctrl) + goto err_ctrl; + } else { + vi->ctrl = NULL; + } vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL); if (!vi->sq) goto err_sq; diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index ffd1e098bfd2..a503c1b2bfd9 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -14,6 +14,7 @@ config VDPA_SIM depends on RUNTIME_TESTING_MENU && HAS_DMA select DMA_OPS select VHOST_RING + select IOMMU_IOVA help Enable this module to support vDPA device simulators. These devices are used for testing, prototyping and development of vDPA. @@ -25,6 +26,13 @@ config VDPA_SIM_NET help vDPA networking device simulator which loops TX traffic back to RX. +config VDPA_SIM_BLOCK + tristate "vDPA simulator for block device" + depends on VDPA_SIM + help + vDPA block device simulator which terminates IO request in a + memory buffer. + config IFCVF tristate "Intel IFC VF vDPA driver" depends on PCI_MSI @@ -52,4 +60,11 @@ config MLX5_VDPA_NET be executed by the hardware. It also supports a variety of stateless offloads depending on the actual device used and firmware version. +config VP_VDPA + tristate "Virtio PCI bridge vDPA driver" + select VIRTIO_PCI_LIB + depends on PCI_MSI + help + This kernel module bridges virtio PCI device to vDPA bus. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index d160e9b63a66..67fe7f3d6943 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ obj-$(CONFIG_IFCVF) += ifcvf/ obj-$(CONFIG_MLX5_VDPA) += mlx5/ +obj-$(CONFIG_VP_VDPA) += virtio_pci/ diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index f2a128e56de5..1a661ab45af5 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -202,10 +202,11 @@ static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status) ifcvf_get_status(hw); } -u64 ifcvf_get_features(struct ifcvf_hw *hw) +u64 ifcvf_get_hw_features(struct ifcvf_hw *hw) { struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; u32 features_lo, features_hi; + u64 features; ifc_iowrite32(0, &cfg->device_feature_select); features_lo = ifc_ioread32(&cfg->device_feature); @@ -213,7 +214,26 @@ u64 ifcvf_get_features(struct ifcvf_hw *hw) ifc_iowrite32(1, &cfg->device_feature_select); features_hi = ifc_ioread32(&cfg->device_feature); - return ((u64)features_hi << 32) | features_lo; + features = ((u64)features_hi << 32) | features_lo; + + return features; +} + +u64 ifcvf_get_features(struct ifcvf_hw *hw) +{ + return hw->hw_features; +} + +int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features) +{ + struct ifcvf_adapter *ifcvf = vf_to_adapter(hw); + + if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) { + IFCVF_ERR(ifcvf->pdev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n"); + return -EINVAL; + } + + return 0; } void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 64696d63fe07..0111bfdeb342 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -15,15 +15,26 @@ #include <linux/pci_regs.h> #include <linux/vdpa.h> #include <uapi/linux/virtio_net.h> +#include <uapi/linux/virtio_blk.h> #include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_pci.h> -#define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_DEVICE_ID 0x1041 -#define IFCVF_SUBSYS_VENDOR_ID 0x8086 -#define IFCVF_SUBSYS_DEVICE_ID 0x001A +#define N3000_VENDOR_ID 0x1AF4 +#define N3000_DEVICE_ID 0x1041 +#define N3000_SUBSYS_VENDOR_ID 0x8086 +#define N3000_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_SUPPORTED_FEATURES \ +#define C5000X_PL_VENDOR_ID 0x1AF4 +#define C5000X_PL_DEVICE_ID 0x1000 +#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086 +#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001 + +#define C5000X_PL_BLK_VENDOR_ID 0x1AF4 +#define C5000X_PL_BLK_DEVICE_ID 0x1001 +#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086 +#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002 + +#define IFCVF_NET_SUPPORTED_FEATURES \ ((1ULL << VIRTIO_NET_F_MAC) | \ (1ULL << VIRTIO_F_ANY_LAYOUT) | \ (1ULL << VIRTIO_F_VERSION_1) | \ @@ -78,6 +89,8 @@ struct ifcvf_hw { void __iomem *notify_base; u32 notify_off_multiplier; u64 req_features; + u64 hw_features; + u32 dev_type; struct virtio_pci_common_cfg __iomem *common_cfg; void __iomem *net_cfg; struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; @@ -116,7 +129,10 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); void io_write64_twopart(u64 val, u32 *lo, u32 *hi); void ifcvf_reset(struct ifcvf_hw *hw); u64 ifcvf_get_features(struct ifcvf_hw *hw); +u64 ifcvf_get_hw_features(struct ifcvf_hw *hw); +int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features); u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); +int ifcvf_probed_virtio_net(struct ifcvf_hw *hw); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index d555a6a5d1ba..ab0ab5cf0f6e 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -14,7 +14,6 @@ #include <linux/sysfs.h> #include "ifcvf_base.h" -#define VERSION_STRING "0.1" #define DRIVER_AUTHOR "Intel Corporation" #define IFCVF_DRIVER_NAME "ifcvf" @@ -169,10 +168,23 @@ static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev) static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) { + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + struct pci_dev *pdev = adapter->pdev; + u64 features; - features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES; + switch (vf->dev_type) { + case VIRTIO_ID_NET: + features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES; + break; + case VIRTIO_ID_BLOCK: + features = ifcvf_get_features(vf); + break; + default: + features = 0; + IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type); + } return features; } @@ -180,6 +192,11 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + int ret; + + ret = ifcvf_verify_min_features(vf, features); + if (ret) + return ret; vf->req_features = features; @@ -319,12 +336,17 @@ static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev) static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) { - return VIRTIO_ID_NET; + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->dev_type; } static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) { - return IFCVF_SUBSYS_VENDOR_ID; + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); + struct pci_dev *pdev = adapter->pdev; + + return pdev->subsystem_vendor; } static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) @@ -332,6 +354,28 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) return IFCVF_QUEUE_ALIGNMENT; } +static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + struct pci_dev *pdev = adapter->pdev; + size_t size; + + switch (vf->dev_type) { + case VIRTIO_ID_NET: + size = sizeof(struct virtio_net_config); + break; + case VIRTIO_ID_BLOCK: + size = sizeof(struct virtio_blk_config); + break; + default: + size = 0; + IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type); + } + + return size; +} + static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, unsigned int offset, void *buf, unsigned int len) @@ -392,6 +436,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .get_device_id = ifcvf_vdpa_get_device_id, .get_vendor_id = ifcvf_vdpa_get_vendor_id, .get_vq_align = ifcvf_vdpa_get_vq_align, + .get_config_size = ifcvf_vdpa_get_config_size, .get_config = ifcvf_vdpa_get_config, .set_config = ifcvf_vdpa_set_config, .set_config_cb = ifcvf_vdpa_set_config_cb, @@ -441,6 +486,19 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, adapter); vf = &adapter->vf; + + /* This drirver drives both modern virtio devices and transitional + * devices in modern mode. + * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM, + * so legacy devices and transitional devices in legacy + * mode will not work for vDPA, this driver will not + * drive devices with legacy interface. + */ + if (pdev->device < 0x1040) + vf->dev_type = pdev->subsystem_device; + else + vf->dev_type = pdev->device - 0x1040; + vf->base = pcim_iomap_table(pdev); adapter->pdev = pdev; @@ -455,6 +513,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) vf->vring[i].irq = -EINVAL; + vf->hw_features = ifcvf_get_hw_features(vf); + ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2); if (ret) { IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); @@ -476,10 +536,19 @@ static void ifcvf_remove(struct pci_dev *pdev) } static struct pci_device_id ifcvf_pci_ids[] = { - { PCI_DEVICE_SUB(IFCVF_VENDOR_ID, - IFCVF_DEVICE_ID, - IFCVF_SUBSYS_VENDOR_ID, - IFCVF_SUBSYS_DEVICE_ID) }, + { PCI_DEVICE_SUB(N3000_VENDOR_ID, + N3000_DEVICE_ID, + N3000_SUBSYS_VENDOR_ID, + N3000_SUBSYS_DEVICE_ID) }, + { PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID, + C5000X_PL_DEVICE_ID, + C5000X_PL_SUBSYS_VENDOR_ID, + C5000X_PL_SUBSYS_DEVICE_ID) }, + { PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID, + C5000X_PL_BLK_DEVICE_ID, + C5000X_PL_BLK_SUBSYS_VENDOR_ID, + C5000X_PL_BLK_SUBSYS_DEVICE_ID) }, + { 0 }, }; MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids); @@ -494,4 +563,3 @@ static struct pci_driver ifcvf_driver = { module_pci_driver(ifcvf_driver); MODULE_LICENSE("GPL v2"); -MODULE_VERSION(VERSION_STRING); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 4d2809c7d4e3..189e4385df40 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1809,6 +1809,11 @@ err_setup: ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; } +static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) +{ + return sizeof(struct virtio_net_config); +} + static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { @@ -1895,6 +1900,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vendor_id = mlx5_vdpa_get_vendor_id, .get_status = mlx5_vdpa_get_status, .set_status = mlx5_vdpa_set_status, + .get_config_size = mlx5_vdpa_get_config_size, .get_config = mlx5_vdpa_get_config, .set_config = mlx5_vdpa_set_config, .get_generation = mlx5_vdpa_get_generation, @@ -1974,23 +1980,32 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) } } -static int mlx5v_probe(struct auxiliary_device *adev, - const struct auxiliary_device_id *id) +struct mlx5_vdpa_mgmtdev { + struct vdpa_mgmt_dev mgtdev; + struct mlx5_adev *madev; + struct mlx5_vdpa_net *ndev; +}; + +static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) { - struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); - struct mlx5_core_dev *mdev = madev->mdev; + struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; + struct mlx5_core_dev *mdev; u32 max_vqs; int err; + if (mgtdev->ndev) + return -ENOSPC; + + mdev = mgtdev->madev->mdev; /* we save one virtqueue for control virtqueue should we require it */ max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - NULL); + name); if (IS_ERR(ndev)) return PTR_ERR(ndev); @@ -2017,11 +2032,12 @@ static int mlx5v_probe(struct auxiliary_device *adev, if (err) goto err_res; - err = vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); + mvdev->vdev.mdev = &mgtdev->mgtdev; + err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); if (err) goto err_reg; - dev_set_drvdata(&adev->dev, ndev); + mgtdev->ndev = ndev; return 0; err_reg: @@ -2034,11 +2050,62 @@ err_mtu: return err; } +static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) +{ + struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); + + _vdpa_unregister_device(dev); + mgtdev->ndev = NULL; +} + +static const struct vdpa_mgmtdev_ops mdev_ops = { + .dev_add = mlx5_vdpa_dev_add, + .dev_del = mlx5_vdpa_dev_del, +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static int mlx5v_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) + +{ + struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = madev->mdev; + struct mlx5_vdpa_mgmtdev *mgtdev; + int err; + + mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); + if (!mgtdev) + return -ENOMEM; + + mgtdev->mgtdev.ops = &mdev_ops; + mgtdev->mgtdev.device = mdev->device; + mgtdev->mgtdev.id_table = id_table; + mgtdev->madev = madev; + + err = vdpa_mgmtdev_register(&mgtdev->mgtdev); + if (err) + goto reg_err; + + dev_set_drvdata(&adev->dev, mgtdev); + + return 0; + +reg_err: + kfree(mgtdev); + return err; +} + static void mlx5v_remove(struct auxiliary_device *adev) { - struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(&adev->dev); + struct mlx5_vdpa_mgmtdev *mgtdev; - vdpa_unregister_device(&mvdev->vdev); + mgtdev = dev_get_drvdata(&adev->dev); + vdpa_mgmtdev_unregister(&mgtdev->mgtdev); + kfree(mgtdev); } static const struct auxiliary_device_id mlx5v_id_table[] = { diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 5cffce67cab0..bb3f1d1f0422 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -75,8 +75,8 @@ static void vdpa_release_dev(struct device *d) * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. * - * Returns an error when parent/config/dma_dev is not set or fail to get - * ida. + * Return: Returns an error when parent/config/dma_dev is not set or fail to get + * ida. */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, @@ -157,7 +157,7 @@ static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs) * @vdev: the vdpa device to be registered to vDPA bus * @nvqs: number of virtqueues supported by this device * - * Returns an error when fail to add device to vDPA bus + * Return: Returns an error when fail to add device to vDPA bus */ int _vdpa_register_device(struct vdpa_device *vdev, int nvqs) { @@ -174,7 +174,7 @@ EXPORT_SYMBOL_GPL(_vdpa_register_device); * @vdev: the vdpa device to be registered to vDPA bus * @nvqs: number of virtqueues supported by this device * - * Returns an error when fail to add to vDPA bus + * Return: Returns an error when fail to add to vDPA bus */ int vdpa_register_device(struct vdpa_device *vdev, int nvqs) { @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vdpa_unregister_device); * @drv: the vdpa device driver to be registered * @owner: module owner of the driver * - * Returns an err when fail to do the registration + * Return: Returns an err when fail to do the registration */ int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) { @@ -245,6 +245,8 @@ EXPORT_SYMBOL_GPL(vdpa_unregister_driver); * @mdev: Pointer to vdpa management device * vdpa_mgmtdev_register() register a vdpa management device which supports * vdpa device management. + * Return: Returns 0 on success or failure when required callback ops are not + * initialized. */ int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev) { diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile index 79d4536d347e..d458103302f2 100644 --- a/drivers/vdpa/vdpa_sim/Makefile +++ b/drivers/vdpa/vdpa_sim/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o +obj-$(CONFIG_VDPA_SIM_BLOCK) += vdpa_sim_blk.o diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 5b6b2f87d40c..98f793bc9376 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -17,6 +17,7 @@ #include <linux/vringh.h> #include <linux/vdpa.h> #include <linux/vhost_iotlb.h> +#include <linux/iova.h> #include "vdpa_sim.h" @@ -128,30 +129,57 @@ static int dir_to_perm(enum dma_data_direction dir) return perm; } +static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr, + size_t size, unsigned int perm) +{ + struct iova *iova; + dma_addr_t dma_addr; + int ret; + + /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */ + iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true); + if (!iova) + return DMA_MAPPING_ERROR; + + dma_addr = iova_dma_addr(&vdpasim->iova, iova); + + spin_lock(&vdpasim->iommu_lock); + ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr, + (u64)dma_addr + size - 1, (u64)paddr, perm); + spin_unlock(&vdpasim->iommu_lock); + + if (ret) { + __free_iova(&vdpasim->iova, iova); + return DMA_MAPPING_ERROR; + } + + return dma_addr; +} + +static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr, + size_t size) +{ + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr, + (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); + + free_iova(&vdpasim->iova, iova_pfn(&vdpasim->iova, dma_addr)); +} + static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset; - int ret, perm = dir_to_perm(dir); + phys_addr_t paddr = page_to_phys(page) + offset; + int perm = dir_to_perm(dir); if (perm < 0) return DMA_MAPPING_ERROR; - /* For simplicity, use identical mapping to avoid e.g iova - * allocator. - */ - spin_lock(&vdpasim->iommu_lock); - ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, - pa, dir_to_perm(dir)); - spin_unlock(&vdpasim->iommu_lock); - if (ret) - return DMA_MAPPING_ERROR; - - return (dma_addr_t)(pa); + return vdpasim_map_range(vdpasim, paddr, size, perm); } static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, @@ -159,12 +187,8 @@ static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - spin_lock(&vdpasim->iommu_lock); - vhost_iotlb_del_range(iommu, (u64)dma_addr, - (u64)dma_addr + size - 1); - spin_unlock(&vdpasim->iommu_lock); + vdpasim_unmap_range(vdpasim, dma_addr, size); } static void *vdpasim_alloc_coherent(struct device *dev, size_t size, @@ -172,27 +196,22 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - void *addr = kmalloc(size, flag); - int ret; + phys_addr_t paddr; + void *addr; - spin_lock(&vdpasim->iommu_lock); + addr = kmalloc(size, flag); if (!addr) { *dma_addr = DMA_MAPPING_ERROR; - } else { - u64 pa = virt_to_phys(addr); - - ret = vhost_iotlb_add_range(iommu, (u64)pa, - (u64)pa + size - 1, - pa, VHOST_MAP_RW); - if (ret) { - *dma_addr = DMA_MAPPING_ERROR; - kfree(addr); - addr = NULL; - } else - *dma_addr = (dma_addr_t)pa; + return NULL; + } + + paddr = virt_to_phys(addr); + + *dma_addr = vdpasim_map_range(vdpasim, paddr, size, VHOST_MAP_RW); + if (*dma_addr == DMA_MAPPING_ERROR) { + kfree(addr); + return NULL; } - spin_unlock(&vdpasim->iommu_lock); return addr; } @@ -202,14 +221,10 @@ static void vdpasim_free_coherent(struct device *dev, size_t size, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - spin_lock(&vdpasim->iommu_lock); - vhost_iotlb_del_range(iommu, (u64)dma_addr, - (u64)dma_addr + size - 1); - spin_unlock(&vdpasim->iommu_lock); + vdpasim_unmap_range(vdpasim, dma_addr, size); - kfree(phys_to_virt((uintptr_t)dma_addr)); + kfree(vaddr); } static const struct dma_map_ops vdpasim_dma_ops = { @@ -269,7 +284,15 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_iommu; for (i = 0; i < dev_attr->nvqs; i++) - vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); + vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu, + &vdpasim->iommu_lock); + + ret = iova_cache_get(); + if (ret) + goto err_iommu; + + /* For simplicity we use an IOVA allocator with byte granularity */ + init_iova_domain(&vdpasim->iova, 1, 0); vdpasim->vdpa.dma_dev = dev; @@ -439,6 +462,13 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) spin_unlock(&vdpasim->lock); } +static size_t vdpasim_get_config_size(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.config_size; +} + static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, void *buf, unsigned int len) { @@ -539,8 +569,17 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) static void vdpasim_free(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int i; cancel_work_sync(&vdpasim->work); + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { + vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov); + vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov); + } + + put_iova_domain(&vdpasim->iova); + iova_cache_put(); kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); @@ -566,6 +605,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, + .get_config_size = vdpasim_get_config_size, .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, @@ -593,6 +633,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, + .get_config_size = vdpasim_get_config_size, .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h index 6d75444f9948..cd58e888bcf3 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -6,6 +6,7 @@ #ifndef _VDPA_SIM_H #define _VDPA_SIM_H +#include <linux/iova.h> #include <linux/vringh.h> #include <linux/vdpa.h> #include <linux/virtio_byteorder.h> @@ -57,6 +58,7 @@ struct vdpasim { /* virtio config according to device type */ void *config; struct vhost_iotlb *iommu; + struct iova_domain iova; void *buffer; u32 status; u32 generation; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c new file mode 100644 index 000000000000..5bfe1c281645 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA simulator for block device. + * + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2021, Red Hat Inc. All rights reserved. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/blkdev.h> +#include <linux/vringh.h> +#include <linux/vdpa.h> +#include <linux/blkdev.h> +#include <uapi/linux/virtio_blk.h> + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>" +#define DRV_DESC "vDPA Device Simulator for block device" +#define DRV_LICENSE "GPL v2" + +#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ + (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ + (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ + (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ + (1ULL << VIRTIO_BLK_F_MQ)) + +#define VDPASIM_BLK_CAPACITY 0x40000 +#define VDPASIM_BLK_SIZE_MAX 0x1000 +#define VDPASIM_BLK_SEG_MAX 32 +#define VDPASIM_BLK_VQ_NUM 1 + +static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; + +static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size) +{ + u64 range_sectors = range_size >> SECTOR_SHIFT; + + if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX) + return false; + + if (start_sector > VDPASIM_BLK_CAPACITY) + return false; + + if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector) + return false; + + return true; +} + +/* Returns 'true' if the request is handled (with or without an I/O error) + * and the status is correctly written in the last byte of the 'in iov', + * 'false' otherwise. + */ +static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, + struct vdpasim_virtqueue *vq) +{ + size_t pushed = 0, to_pull, to_push; + struct virtio_blk_outhdr hdr; + ssize_t bytes; + loff_t offset; + u64 sector; + u8 status; + u32 type; + int ret; + + ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, + &vq->head, GFP_ATOMIC); + if (ret != 1) + return false; + + if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { + dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", + vq->out_iov.used, vq->in_iov.used); + return false; + } + + if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { + dev_err(&vdpasim->vdpa.dev, "request in header too short\n"); + return false; + } + + /* The last byte is the status and we checked if the last iov has + * enough room for it. + */ + to_push = vringh_kiov_length(&vq->in_iov) - 1; + + to_pull = vringh_kiov_length(&vq->out_iov); + + bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, + sizeof(hdr)); + if (bytes != sizeof(hdr)) { + dev_err(&vdpasim->vdpa.dev, "request out header too short\n"); + return false; + } + + to_pull -= bytes; + + type = vdpasim32_to_cpu(vdpasim, hdr.type); + sector = vdpasim64_to_cpu(vdpasim, hdr.sector); + offset = sector << SECTOR_SHIFT; + status = VIRTIO_BLK_S_OK; + + switch (type) { + case VIRTIO_BLK_T_IN: + if (!vdpasim_blk_check_range(sector, to_push)) { + dev_err(&vdpasim->vdpa.dev, + "reading over the capacity - offset: 0x%llx len: 0x%zx\n", + offset, to_push); + status = VIRTIO_BLK_S_IOERR; + break; + } + + bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, + vdpasim->buffer + offset, + to_push); + if (bytes < 0) { + dev_err(&vdpasim->vdpa.dev, + "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", + bytes, offset, to_push); + status = VIRTIO_BLK_S_IOERR; + break; + } + + pushed += bytes; + break; + + case VIRTIO_BLK_T_OUT: + if (!vdpasim_blk_check_range(sector, to_pull)) { + dev_err(&vdpasim->vdpa.dev, + "writing over the capacity - offset: 0x%llx len: 0x%zx\n", + offset, to_pull); + status = VIRTIO_BLK_S_IOERR; + break; + } + + bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, + vdpasim->buffer + offset, + to_pull); + if (bytes < 0) { + dev_err(&vdpasim->vdpa.dev, + "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", + bytes, offset, to_pull); + status = VIRTIO_BLK_S_IOERR; + break; + } + break; + + case VIRTIO_BLK_T_GET_ID: + bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, + vdpasim_blk_id, + VIRTIO_BLK_ID_BYTES); + if (bytes < 0) { + dev_err(&vdpasim->vdpa.dev, + "vringh_iov_push_iotlb() error: %zd\n", bytes); + status = VIRTIO_BLK_S_IOERR; + break; + } + + pushed += bytes; + break; + + default: + dev_warn(&vdpasim->vdpa.dev, + "Unsupported request type %d\n", type); + status = VIRTIO_BLK_S_IOERR; + break; + } + + /* If some operations fail, we need to skip the remaining bytes + * to put the status in the last byte + */ + if (to_push - pushed > 0) + vringh_kiov_advance(&vq->in_iov, to_push - pushed); + + /* Last byte is the status */ + bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); + if (bytes != 1) + return false; + + pushed += bytes; + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&vq->vring, vq->head, pushed); + + return true; +} + +static void vdpasim_blk_work(struct work_struct *work) +{ + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); + int i; + + spin_lock(&vdpasim->lock); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { + struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; + + if (!vq->ready) + continue; + + while (vdpasim_blk_handle_req(vdpasim, vq)) { + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&vq->vring) > 0) + vringh_notify(&vq->vring); + local_bh_enable(); + } + } +out: + spin_unlock(&vdpasim->lock); +} + +static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_blk_config *blk_config = config; + + memset(config, 0, sizeof(struct virtio_blk_config)); + + blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY); + blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX); + blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX); + blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM); + blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); + blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); + blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); +} + +static void vdpasim_blk_mgmtdev_release(struct device *dev) +{ +} + +static struct device vdpasim_blk_mgmtdev = { + .init_name = "vdpasim_blk", + .release = vdpasim_blk_mgmtdev_release, +}; + +static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +{ + struct vdpasim_dev_attr dev_attr = {}; + struct vdpasim *simdev; + int ret; + + dev_attr.mgmt_dev = mdev; + dev_attr.name = name; + dev_attr.id = VIRTIO_ID_BLOCK; + dev_attr.supported_features = VDPASIM_BLK_FEATURES; + dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_blk_config); + dev_attr.get_config = vdpasim_blk_get_config; + dev_attr.work_fn = vdpasim_blk_work; + dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT; + + simdev = vdpasim_create(&dev_attr); + if (IS_ERR(simdev)) + return PTR_ERR(simdev); + + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); + if (ret) + goto put_dev; + + return 0; + +put_dev: + put_device(&simdev->vdpa.dev); + return ret; +} + +static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev, + struct vdpa_device *dev) +{ + struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); + + _vdpa_unregister_device(&simdev->vdpa); +} + +static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = { + .dev_add = vdpasim_blk_dev_add, + .dev_del = vdpasim_blk_dev_del +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static struct vdpa_mgmt_dev mgmt_dev = { + .device = &vdpasim_blk_mgmtdev, + .id_table = id_table, + .ops = &vdpasim_blk_mgmtdev_ops, +}; + +static int __init vdpasim_blk_init(void) +{ + int ret; + + ret = device_register(&vdpasim_blk_mgmtdev); + if (ret) + return ret; + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) + goto parent_err; + + return 0; + +parent_err: + device_unregister(&vdpasim_blk_mgmtdev); + return ret; +} + +static void __exit vdpasim_blk_exit(void) +{ + vdpa_mgmtdev_unregister(&mgmt_dev); + device_unregister(&vdpasim_blk_mgmtdev); +} + +module_init(vdpasim_blk_init) +module_exit(vdpasim_blk_exit) + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); diff --git a/drivers/vdpa/virtio_pci/Makefile b/drivers/vdpa/virtio_pci/Makefile new file mode 100644 index 000000000000..231088d3af7d --- /dev/null +++ b/drivers/vdpa/virtio_pci/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VP_VDPA) += vp_vdpa.o diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c new file mode 100644 index 000000000000..c76ebb531212 --- /dev/null +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bridge driver for modern virtio-pci device + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang <jasowang@redhat.com> + * + * Based on virtio_pci_modern.c. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/vdpa.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_pci.h> +#include <linux/virtio_pci_modern.h> + +#define VP_VDPA_QUEUE_MAX 256 +#define VP_VDPA_DRIVER_NAME "vp_vdpa" +#define VP_VDPA_NAME_SIZE 256 + +struct vp_vring { + void __iomem *notify; + char msix_name[VP_VDPA_NAME_SIZE]; + struct vdpa_callback cb; + resource_size_t notify_pa; + int irq; +}; + +struct vp_vdpa { + struct vdpa_device vdpa; + struct virtio_pci_modern_device mdev; + struct vp_vring *vring; + struct vdpa_callback config_cb; + char msix_name[VP_VDPA_NAME_SIZE]; + int config_irq; + int queues; + int vectors; +}; + +static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vp_vdpa, vdpa); +} + +static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + + return &vp_vdpa->mdev; +} + +static u64 vp_vdpa_get_features(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return vp_modern_get_features(mdev); +} + +static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + vp_modern_set_features(mdev, features); + + return 0; +} + +static u8 vp_vdpa_get_status(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return vp_modern_get_status(mdev); +} + +static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa) +{ + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct pci_dev *pdev = mdev->pci_dev; + int i; + + for (i = 0; i < vp_vdpa->queues; i++) { + if (vp_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) { + vp_modern_queue_vector(mdev, i, VIRTIO_MSI_NO_VECTOR); + devm_free_irq(&pdev->dev, vp_vdpa->vring[i].irq, + &vp_vdpa->vring[i]); + vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; + } + } + + if (vp_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) { + vp_modern_config_vector(mdev, VIRTIO_MSI_NO_VECTOR); + devm_free_irq(&pdev->dev, vp_vdpa->config_irq, vp_vdpa); + vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; + } + + if (vp_vdpa->vectors) { + pci_free_irq_vectors(pdev); + vp_vdpa->vectors = 0; + } +} + +static irqreturn_t vp_vdpa_vq_handler(int irq, void *arg) +{ + struct vp_vring *vring = arg; + + if (vring->cb.callback) + return vring->cb.callback(vring->cb.private); + + return IRQ_HANDLED; +} + +static irqreturn_t vp_vdpa_config_handler(int irq, void *arg) +{ + struct vp_vdpa *vp_vdpa = arg; + + if (vp_vdpa->config_cb.callback) + return vp_vdpa->config_cb.callback(vp_vdpa->config_cb.private); + + return IRQ_HANDLED; +} + +static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) +{ + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct pci_dev *pdev = mdev->pci_dev; + int i, ret, irq; + int queues = vp_vdpa->queues; + int vectors = queues + 1; + + ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX); + if (ret != vectors) { + dev_err(&pdev->dev, + "vp_vdpa: fail to allocate irq vectors want %d but %d\n", + vectors, ret); + return ret; + } + + vp_vdpa->vectors = vectors; + + for (i = 0; i < queues; i++) { + snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE, + "vp-vdpa[%s]-%d\n", pci_name(pdev), i); + irq = pci_irq_vector(pdev, i); + ret = devm_request_irq(&pdev->dev, irq, + vp_vdpa_vq_handler, + 0, vp_vdpa->vring[i].msix_name, + &vp_vdpa->vring[i]); + if (ret) { + dev_err(&pdev->dev, + "vp_vdpa: fail to request irq for vq %d\n", i); + goto err; + } + vp_modern_queue_vector(mdev, i, i); + vp_vdpa->vring[i].irq = irq; + } + + snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n", + pci_name(pdev)); + irq = pci_irq_vector(pdev, queues); + ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_config_handler, 0, + vp_vdpa->msix_name, vp_vdpa); + if (ret) { + dev_err(&pdev->dev, + "vp_vdpa: fail to request irq for vq %d\n", i); + goto err; + } + vp_modern_config_vector(mdev, queues); + vp_vdpa->config_irq = irq; + + return 0; +err: + vp_vdpa_free_irq(vp_vdpa); + return ret; +} + +static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + u8 s = vp_vdpa_get_status(vdpa); + + if (status & VIRTIO_CONFIG_S_DRIVER_OK && + !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { + vp_vdpa_request_irq(vp_vdpa); + } + + vp_modern_set_status(mdev, status); + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) && + (s & VIRTIO_CONFIG_S_DRIVER_OK)) + vp_vdpa_free_irq(vp_vdpa); +} + +static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa) +{ + return VP_VDPA_QUEUE_MAX; +} + +static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid, + struct vdpa_vq_state *state) +{ + /* Note that this is not supported by virtio specification, so + * we return -EOPNOTSUPP here. This means we can't support live + * migration, vhost device start/stop. + */ + return -EOPNOTSUPP; +} + +static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid, + const struct vdpa_vq_state *state) +{ + /* Note that this is not supported by virtio specification, so + * we return -ENOPOTSUPP here. This means we can't support live + * migration, vhost device start/stop. + */ + return -EOPNOTSUPP; +} + +static void vp_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid, + struct vdpa_callback *cb) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + + vp_vdpa->vring[qid].cb = *cb; +} + +static void vp_vdpa_set_vq_ready(struct vdpa_device *vdpa, + u16 qid, bool ready) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + vp_modern_set_queue_enable(mdev, qid, ready); +} + +static bool vp_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return vp_modern_get_queue_enable(mdev, qid); +} + +static void vp_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid, + u32 num) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + vp_modern_set_queue_size(mdev, qid, num); +} + +static int vp_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + vp_modern_queue_address(mdev, qid, desc_area, + driver_area, device_area); + + return 0; +} + +static void vp_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + + vp_iowrite16(qid, vp_vdpa->vring[qid].notify); +} + +static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return vp_modern_generation(mdev); +} + +static u32 vp_vdpa_get_device_id(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return mdev->id.device; +} + +static u32 vp_vdpa_get_vendor_id(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return mdev->id.vendor; +} + +static u32 vp_vdpa_get_vq_align(struct vdpa_device *vdpa) +{ + return PAGE_SIZE; +} + +static size_t vp_vdpa_get_config_size(struct vdpa_device *vdpa) +{ + struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa); + + return mdev->device_len; +} + +static void vp_vdpa_get_config(struct vdpa_device *vdpa, + unsigned int offset, + void *buf, unsigned int len) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + u8 old, new; + u8 *p; + int i; + + do { + old = vp_ioread8(&mdev->common->config_generation); + p = buf; + for (i = 0; i < len; i++) + *p++ = vp_ioread8(mdev->device + offset + i); + + new = vp_ioread8(&mdev->common->config_generation); + } while (old != new); +} + +static void vp_vdpa_set_config(struct vdpa_device *vdpa, + unsigned int offset, const void *buf, + unsigned int len) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + const u8 *p = buf; + int i; + + for (i = 0; i < len; i++) + vp_iowrite8(*p++, mdev->device + offset + i); +} + +static void vp_vdpa_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + + vp_vdpa->config_cb = *cb; +} + +static struct vdpa_notification_area +vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct vdpa_notification_area notify; + + notify.addr = vp_vdpa->vring[qid].notify_pa; + notify.size = mdev->notify_offset_multiplier; + + return notify; +} + +static const struct vdpa_config_ops vp_vdpa_ops = { + .get_features = vp_vdpa_get_features, + .set_features = vp_vdpa_set_features, + .get_status = vp_vdpa_get_status, + .set_status = vp_vdpa_set_status, + .get_vq_num_max = vp_vdpa_get_vq_num_max, + .get_vq_state = vp_vdpa_get_vq_state, + .get_vq_notification = vp_vdpa_get_vq_notification, + .set_vq_state = vp_vdpa_set_vq_state, + .set_vq_cb = vp_vdpa_set_vq_cb, + .set_vq_ready = vp_vdpa_set_vq_ready, + .get_vq_ready = vp_vdpa_get_vq_ready, + .set_vq_num = vp_vdpa_set_vq_num, + .set_vq_address = vp_vdpa_set_vq_address, + .kick_vq = vp_vdpa_kick_vq, + .get_generation = vp_vdpa_get_generation, + .get_device_id = vp_vdpa_get_device_id, + .get_vendor_id = vp_vdpa_get_vendor_id, + .get_vq_align = vp_vdpa_get_vq_align, + .get_config_size = vp_vdpa_get_config_size, + .get_config = vp_vdpa_get_config, + .set_config = vp_vdpa_set_config, + .set_config_cb = vp_vdpa_set_config_cb, +}; + +static void vp_vdpa_free_irq_vectors(void *data) +{ + pci_free_irq_vectors(data); +} + +static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct virtio_pci_modern_device *mdev; + struct device *dev = &pdev->dev; + struct vp_vdpa *vp_vdpa; + int ret, i; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, + dev, &vp_vdpa_ops, NULL); + if (vp_vdpa == NULL) { + dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); + return -ENOMEM; + } + + mdev = &vp_vdpa->mdev; + mdev->pci_dev = pdev; + + ret = vp_modern_probe(mdev); + if (ret) { + dev_err(&pdev->dev, "Failed to probe modern PCI device\n"); + goto err; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, vp_vdpa); + + vp_vdpa->vdpa.dma_dev = &pdev->dev; + vp_vdpa->queues = vp_modern_get_num_queues(mdev); + + ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev); + if (ret) { + dev_err(&pdev->dev, + "Failed for adding devres for freeing irq vectors\n"); + goto err; + } + + vp_vdpa->vring = devm_kcalloc(&pdev->dev, vp_vdpa->queues, + sizeof(*vp_vdpa->vring), + GFP_KERNEL); + if (!vp_vdpa->vring) { + ret = -ENOMEM; + dev_err(&pdev->dev, "Fail to allocate virtqueues\n"); + goto err; + } + + for (i = 0; i < vp_vdpa->queues; i++) { + vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; + vp_vdpa->vring[i].notify = + vp_modern_map_vq_notify(mdev, i, + &vp_vdpa->vring[i].notify_pa); + if (!vp_vdpa->vring[i].notify) { + dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i); + goto err; + } + } + vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; + + ret = vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues); + if (ret) { + dev_err(&pdev->dev, "Failed to register to vdpa bus\n"); + goto err; + } + + return 0; + +err: + put_device(&vp_vdpa->vdpa.dev); + return ret; +} + +static void vp_vdpa_remove(struct pci_dev *pdev) +{ + struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev); + + vdpa_unregister_device(&vp_vdpa->vdpa); + vp_modern_remove(&vp_vdpa->mdev); +} + +static struct pci_driver vp_vdpa_driver = { + .name = "vp-vdpa", + .id_table = NULL, /* only dynamic ids */ + .probe = vp_vdpa_probe, + .remove = vp_vdpa_remove, +}; + +module_pci_driver(vp_vdpa_driver); + +MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>"); +MODULE_DESCRIPTION("vp-vdpa"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index f5ebe008a28b..fb41db3da611 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -16,12 +16,12 @@ #include <linux/cdev.h> #include <linux/device.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/iommu.h> #include <linux/uuid.h> #include <linux/vdpa.h> #include <linux/nospec.h> #include <linux/vhost.h> -#include <linux/virtio_net.h> #include "vhost.h" @@ -188,13 +188,8 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vhost_vdpa_config *c) { - long size = 0; - - switch (v->virtio_id) { - case VIRTIO_ID_NET: - size = sizeof(struct virtio_net_config); - break; - } + struct vdpa_device *vdpa = v->vdpa; + long size = vdpa->config->get_config_size(vdpa); if (c->len == 0) return -EINVAL; @@ -989,6 +984,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_end - vma->vm_start != notify.size) return -ENOTSUPP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &vhost_vdpa_vm_ops; return 0; } @@ -1023,10 +1019,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) int minor; int r; - /* Currently, we only accept the network devices. */ - if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) - return -ENOTSUPP; - v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!v) return -ENOMEM; diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 85d85faba058..4af8fa259d65 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -75,6 +75,34 @@ static inline int __vringh_get_head(const struct vringh *vrh, return head; } +/** + * vringh_kiov_advance - skip bytes from vring_kiov + * @iov: an iov passed to vringh_getdesc_*() (updated as we consume) + * @len: the maximum length to advance + */ +void vringh_kiov_advance(struct vringh_kiov *iov, size_t len) +{ + while (len && iov->i < iov->used) { + size_t partlen = min(iov->iov[iov->i].iov_len, len); + + iov->consumed += partlen; + iov->iov[iov->i].iov_len -= partlen; + iov->iov[iov->i].iov_base += partlen; + + if (!iov->iov[iov->i].iov_len) { + /* Fix up old iov element then increment. */ + iov->iov[iov->i].iov_len = iov->consumed; + iov->iov[iov->i].iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } + + len -= partlen; + } +} +EXPORT_SYMBOL(vringh_kiov_advance); + /* Copy some bytes to/from the iovec. Returns num copied. */ static inline ssize_t vringh_iov_xfer(struct vringh *vrh, struct vringh_kiov *iov, @@ -95,19 +123,8 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh, done += partlen; len -= partlen; ptr += partlen; - iov->consumed += partlen; - iov->iov[iov->i].iov_len -= partlen; - iov->iov[iov->i].iov_base += partlen; - if (!iov->iov[iov->i].iov_len) { - /* Fix up old iov element then increment. */ - iov->iov[iov->i].iov_len = iov->consumed; - iov->iov[iov->i].iov_base -= iov->consumed; - - - iov->consumed = 0; - iov->i++; - } + vringh_kiov_advance(iov, partlen); } return done; } @@ -290,9 +307,9 @@ __vringh_iov(struct vringh *vrh, u16 i, return -EINVAL; if (riov) - riov->i = riov->used = 0; + riov->i = riov->used = riov->consumed = 0; if (wiov) - wiov->i = wiov->used = 0; + wiov->i = wiov->used = wiov->consumed = 0; for (;;) { void *addr; @@ -662,7 +679,10 @@ EXPORT_SYMBOL(vringh_init_user); * *head will be vrh->vring.num. You may be able to ignore an invalid * descriptor, but there's not much you can do with an invalid ring. * - * Note that you may need to clean up riov and wiov, even on error! + * Note that you can reuse riov and wiov with subsequent calls. Content is + * overwritten and memory reallocated if more space is needed. + * When you don't have to use riov and wiov anymore, you should clean up them + * calling vringh_iov_cleanup() to release the memory, even on error! */ int vringh_getdesc_user(struct vringh *vrh, struct vringh_iov *riov, @@ -932,7 +952,10 @@ EXPORT_SYMBOL(vringh_init_kern); * *head will be vrh->vring.num. You may be able to ignore an invalid * descriptor, but there's not much you can do with an invalid ring. * - * Note that you may need to clean up riov and wiov, even on error! + * Note that you can reuse riov and wiov with subsequent calls. Content is + * overwritten and memory reallocated if more space is needed. + * When you don't have to use riov and wiov anymore, you should clean up them + * calling vringh_kiov_cleanup() to release the memory, even on error! */ int vringh_getdesc_kern(struct vringh *vrh, struct vringh_kiov *riov, @@ -1074,6 +1097,8 @@ static int iotlb_translate(const struct vringh *vrh, int ret = 0; u64 s = 0; + spin_lock(vrh->iotlb_lock); + while (len > s) { u64 size, pa, pfn; @@ -1103,6 +1128,8 @@ static int iotlb_translate(const struct vringh *vrh, ++ret; } + spin_unlock(vrh->iotlb_lock); + return ret; } @@ -1262,10 +1289,13 @@ EXPORT_SYMBOL(vringh_init_iotlb); * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. * @vrh: the vring * @iotlb: iotlb associated with this vring + * @iotlb_lock: spinlock to synchronize the iotlb accesses */ -void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb) +void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, + spinlock_t *iotlb_lock) { vrh->iotlb = iotlb; + vrh->iotlb_lock = iotlb_lock; } EXPORT_SYMBOL(vringh_set_iotlb); @@ -1285,7 +1315,10 @@ EXPORT_SYMBOL(vringh_set_iotlb); * *head will be vrh->vring.num. You may be able to ignore an invalid * descriptor, but there's not much you can do with an invalid ring. * - * Note that you may need to clean up riov and wiov, even on error! + * Note that you can reuse riov and wiov with subsequent calls. Content is + * overwritten and memory reallocated if more space is needed. + * When you don't have to use riov and wiov anymore, you should clean up them + * calling vringh_kiov_cleanup() to release the memory, even on error! */ int vringh_getdesc_iotlb(struct vringh *vrh, struct vringh_kiov *riov, diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8985fc2cea86..510e9318854d 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -734,7 +734,7 @@ static void report_free_page_func(struct work_struct *work) #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of - * a compation thread. (called under page lock) + * a compaction thread. (called under page lock) * @vb_dev_info: the balloon device * @newpage: page that will replace the isolated page after migration finishes. * @page : the isolated (old) page that is about to be migrated to newpage. diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index fbd4ebc00eb6..30654d3a0b41 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -192,7 +192,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtio_pci_modern_device *mdev = &vp_dev->mdev; struct virtqueue *vq; - u16 num, off; + u16 num; int err; if (index >= vp_modern_get_num_queues(mdev)) @@ -208,9 +208,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, return ERR_PTR(-EINVAL); } - /* get offset of notification word for this vq */ - off = vp_modern_get_queue_notify_off(mdev, index); - info->msix_vector = msix_vec; /* create the vring */ @@ -227,27 +224,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, virtqueue_get_avail_addr(vq), virtqueue_get_used_addr(vq)); - if (mdev->notify_base) { - /* offset should not wrap */ - if ((u64)off * mdev->notify_offset_multiplier + 2 - > mdev->notify_len) { - dev_warn(&mdev->pci_dev->dev, - "bad notification offset %u (x %u) " - "for queue %u > %zd", - off, mdev->notify_offset_multiplier, - index, mdev->notify_len); - err = -EINVAL; - goto err_map_notify; - } - vq->priv = (void __force *)mdev->notify_base + - off * mdev->notify_offset_multiplier; - } else { - vq->priv = (void __force *)vp_modern_map_capability(mdev, - mdev->notify_map_cap, 2, 2, - off * mdev->notify_offset_multiplier, 2, - NULL); - } - + vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL); if (!vq->priv) { err = -ENOMEM; goto err_map_notify; diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index cbd667496bb1..54f297028586 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -13,14 +13,14 @@ * @start: start from the capability * @size: map size * @len: the length that is actually mapped + * @pa: physical address of the capability * * Returns the io address of for the part of the capability */ -void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, - size_t minlen, - u32 align, - u32 start, u32 size, - size_t *len) +static void __iomem * +vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, + size_t minlen, u32 align, u32 start, u32 size, + size_t *len, resource_size_t *pa) { struct pci_dev *dev = mdev->pci_dev; u8 bar; @@ -88,9 +88,11 @@ void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, in dev_err(&dev->dev, "virtio_pci: unable to map virtio %u@%u on bar %i\n", length, offset, bar); + else if (pa) + *pa = pci_resource_start(dev, bar) + offset; + return p; } -EXPORT_SYMBOL_GPL(vp_modern_map_capability); /** * virtio_pci_find_capability - walk capabilities to find device info. @@ -275,12 +277,12 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) mdev->common = vp_modern_map_capability(mdev, common, sizeof(struct virtio_pci_common_cfg), 4, 0, sizeof(struct virtio_pci_common_cfg), - NULL); + NULL, NULL); if (!mdev->common) goto err_map_common; mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1, 0, 1, - NULL); + NULL, NULL); if (!mdev->isr) goto err_map_isr; @@ -308,7 +310,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) mdev->notify_base = vp_modern_map_capability(mdev, notify, 2, 2, 0, notify_length, - &mdev->notify_len); + &mdev->notify_len, + &mdev->notify_pa); if (!mdev->notify_base) goto err_map_notify; } else { @@ -321,7 +324,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) if (device) { mdev->device = vp_modern_map_capability(mdev, device, 0, 4, 0, PAGE_SIZE, - &mdev->device_len); + &mdev->device_len, + NULL); if (!mdev->device) goto err_map_device; } @@ -584,14 +588,51 @@ EXPORT_SYMBOL_GPL(vp_modern_get_num_queues); * * Returns the notification offset for a virtqueue */ -u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev, - u16 index) +static u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev, + u16 index) { vp_iowrite16(index, &mdev->common->queue_select); return vp_ioread16(&mdev->common->queue_notify_off); } -EXPORT_SYMBOL_GPL(vp_modern_get_queue_notify_off); + +/* + * vp_modern_map_vq_notify - map notification area for a + * specific virtqueue + * @mdev: the modern virtio-pci device + * @index: the queue index + * @pa: the pointer to the physical address of the nofity area + * + * Returns the address of the notification area + */ +void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, + u16 index, resource_size_t *pa) +{ + u16 off = vp_modern_get_queue_notify_off(mdev, index); + + if (mdev->notify_base) { + /* offset should not wrap */ + if ((u64)off * mdev->notify_offset_multiplier + 2 + > mdev->notify_len) { + dev_warn(&mdev->pci_dev->dev, + "bad notification offset %u (x %u) " + "for queue %u > %zd", + off, mdev->notify_offset_multiplier, + index, mdev->notify_len); + return NULL; + } + if (pa) + *pa = mdev->notify_pa + + off * mdev->notify_offset_multiplier; + return mdev->notify_base + off * mdev->notify_offset_multiplier; + } else { + return vp_modern_map_capability(mdev, + mdev->notify_map_cap, 2, 2, + off * mdev->notify_offset_multiplier, 2, + NULL, pa); + } +} +EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify); MODULE_VERSION("0.1"); MODULE_DESCRIPTION("Modern Virtio PCI Device"); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 15fa085fab05..f311d227aa1b 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -8,7 +8,7 @@ #include <linux/vhost_iotlb.h> /** - * vDPA callback definition. + * struct vdpa_calllback - vDPA callback definition. * @callback: interrupt callback function * @private: the data passed to the callback function */ @@ -18,7 +18,7 @@ struct vdpa_callback { }; /** - * vDPA notification area + * struct vdpa_notification_area - vDPA notification area * @addr: base address of the notification area * @size: size of the notification area */ @@ -28,7 +28,7 @@ struct vdpa_notification_area { }; /** - * vDPA vq_state definition + * struct vdpa_vq_state - vDPA vq_state definition * @avail_index: available index */ struct vdpa_vq_state { @@ -38,7 +38,7 @@ struct vdpa_vq_state { struct vdpa_mgmt_dev; /** - * vDPA device - representation of a vDPA device + * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. @@ -59,7 +59,7 @@ struct vdpa_device { }; /** - * vDPA IOVA range - the IOVA range support by the device + * struct vdpa_iova_range - the IOVA range support by the device * @first: start of the IOVA range * @last: end of the IOVA range */ @@ -69,7 +69,7 @@ struct vdpa_iova_range { }; /** - * vDPA_config_ops - operations for configuring a vDPA device. + * struct vdpa_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the * operations unless it is mentioned to be optional in the following * list. @@ -150,6 +150,9 @@ struct vdpa_iova_range { * @set_status: Set the device status * @vdev: vdpa device * @status: virtio device status + * @get_config_size: Get the size of the configuration space + * @vdev: vdpa device + * Returns size_t: configuration size * @get_config: Read from device specific configuration space * @vdev: vdpa device * @offset: offset from the beginning of @@ -231,6 +234,7 @@ struct vdpa_config_ops { u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); + size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void (*set_config)(struct vdpa_device *vdev, unsigned int offset, @@ -267,7 +271,7 @@ int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** - * vdpa_driver - operations for a vDPA driver + * struct vdpa_driver - operations for a vDPA driver * @driver: underlying device driver * @probe: the function to call when a device is found. Returns 0 or -errno. * @remove: the function to call when a device is removed. @@ -344,18 +348,18 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset, } /** - * vdpa_mgmtdev_ops - vdpa device ops - * @dev_add: Add a vdpa device using alloc and register - * @mdev: parent device to use for device addition - * @name: name of the new vdpa device - * Driver need to add a new device using _vdpa_register_device() - * after fully initializing the vdpa device. Driver must return 0 - * on success or appropriate error code. - * @dev_del: Remove a vdpa device using unregister - * @mdev: parent device to use for device removal - * @dev: vdpa device to remove - * Driver need to remove the specified device by calling - * _vdpa_unregister_device(). + * struct vdpa_mgmtdev_ops - vdpa device ops + * @dev_add: Add a vdpa device using alloc and register + * @mdev: parent device to use for device addition + * @name: name of the new vdpa device + * Driver need to add a new device using _vdpa_register_device() + * after fully initializing the vdpa device. Driver must return 0 + * on success or appropriate error code. + * @dev_del: Remove a vdpa device using unregister + * @mdev: parent device to use for device removal + * @dev: vdpa device to remove + * Driver need to remove the specified device by calling + * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index f26acbeec965..6a95b58fd0f4 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -13,6 +13,8 @@ struct virtio_pci_modern_device { void __iomem *device; /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; + /* Physical base of vq notifications */ + resource_size_t notify_pa; /* Where to read and clear interrupt */ u8 __iomem *isr; @@ -99,13 +101,8 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev, u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev, u16 idx); u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev); -u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev, - u16 idx); -void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, - size_t minlen, - u32 align, - u32 start, u32 size, - size_t *len); +void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, + u16 index, resource_size_t *pa); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); #endif diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 59bd50f99291..84db7b8f912f 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -46,6 +46,9 @@ struct vringh { /* IOTLB for this vring */ struct vhost_iotlb *iotlb; + /* spinlock to synchronize IOTLB accesses */ + spinlock_t *iotlb_lock; + /* The function to call to notify the guest about added buffers */ void (*notify)(struct vringh *); }; @@ -196,6 +199,19 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov) kiov->iov = NULL; } +static inline size_t vringh_kiov_length(struct vringh_kiov *kiov) +{ + size_t len = 0; + int i; + + for (i = kiov->i; i < kiov->used; i++) + len += kiov->iov[i].iov_len; + + return len; +} + +void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len); + int vringh_getdesc_kern(struct vringh *vrh, struct vringh_kiov *riov, struct vringh_kiov *wiov, @@ -258,7 +274,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) #if IS_REACHABLE(CONFIG_VHOST_IOTLB) -void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); +void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, + spinlock_t *iotlb_lock); int vringh_init_iotlb(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, |