From 9ea9b9c48387edc101d56349492ad9c0492ff78d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Aug 2021 15:23:08 +0200 Subject: remove the lightnvm subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lightnvm supports the OCSSD 1.x and 2.0 specs which were early attempts to produce Open Channel SSDs and never made it into the NVMe spec proper. They have since been superceeded by NVMe enhancements such as ZNS support. Remove the support per the deprecation schedule. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210812132308.38486-1-hch@lst.de Reviewed-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- Documentation/driver-api/index.rst | 1 - Documentation/driver-api/lightnvm-pblk.rst | 21 - Documentation/userspace-api/ioctl/ioctl-number.rst | 1 - MAINTAINERS | 9 - drivers/Kconfig | 2 - drivers/Makefile | 1 - drivers/lightnvm/Kconfig | 44 - drivers/lightnvm/Makefile | 11 - drivers/lightnvm/core.c | 1440 ------------- drivers/lightnvm/pblk-cache.c | 137 -- drivers/lightnvm/pblk-core.c | 2151 -------------------- drivers/lightnvm/pblk-gc.c | 726 ------- drivers/lightnvm/pblk-init.c | 1324 ------------ drivers/lightnvm/pblk-map.c | 210 -- drivers/lightnvm/pblk-rb.c | 858 -------- drivers/lightnvm/pblk-read.c | 474 ----- drivers/lightnvm/pblk-recovery.c | 874 -------- drivers/lightnvm/pblk-rl.c | 254 --- drivers/lightnvm/pblk-sysfs.c | 728 ------- drivers/lightnvm/pblk-trace.h | 145 -- drivers/lightnvm/pblk-write.c | 665 ------ drivers/lightnvm/pblk.h | 1358 ------------ drivers/nvme/host/Makefile | 1 - drivers/nvme/host/core.c | 13 - drivers/nvme/host/ioctl.c | 4 +- drivers/nvme/host/lightnvm.c | 1274 ------------ drivers/nvme/host/nvme.h | 26 - drivers/nvme/host/pci.c | 6 - include/linux/lightnvm.h | 697 ------- include/uapi/linux/lightnvm.h | 224 -- 30 files changed, 1 insertion(+), 13678 deletions(-) delete mode 100644 Documentation/driver-api/lightnvm-pblk.rst delete mode 100644 drivers/lightnvm/Kconfig delete mode 100644 drivers/lightnvm/Makefile delete mode 100644 drivers/lightnvm/core.c delete mode 100644 drivers/lightnvm/pblk-cache.c delete mode 100644 drivers/lightnvm/pblk-core.c delete mode 100644 drivers/lightnvm/pblk-gc.c delete mode 100644 drivers/lightnvm/pblk-init.c delete mode 100644 drivers/lightnvm/pblk-map.c delete mode 100644 drivers/lightnvm/pblk-rb.c delete mode 100644 drivers/lightnvm/pblk-read.c delete mode 100644 drivers/lightnvm/pblk-recovery.c delete mode 100644 drivers/lightnvm/pblk-rl.c delete mode 100644 drivers/lightnvm/pblk-sysfs.c delete mode 100644 drivers/lightnvm/pblk-trace.h delete mode 100644 drivers/lightnvm/pblk-write.c delete mode 100644 drivers/lightnvm/pblk.h delete mode 100644 drivers/nvme/host/lightnvm.c delete mode 100644 include/linux/lightnvm.h delete mode 100644 include/uapi/linux/lightnvm.h diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index f5a3207aa7fa..c57c609ad2eb 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst @@ -85,7 +85,6 @@ available subsections can be seen below. io-mapping io_ordering generic-counter - lightnvm-pblk memory-devices/index men-chameleon-bus ntb diff --git a/Documentation/driver-api/lightnvm-pblk.rst b/Documentation/driver-api/lightnvm-pblk.rst deleted file mode 100644 index 1040ed1cec81..000000000000 --- a/Documentation/driver-api/lightnvm-pblk.rst +++ /dev/null @@ -1,21 +0,0 @@ -pblk: Physical Block Device Target -================================== - -pblk implements a fully associative, host-based FTL that exposes a traditional -block I/O interface. Its primary responsibilities are: - - - Map logical addresses onto physical addresses (4KB granularity) in a - logical-to-physical (L2P) table. - - Maintain the integrity and consistency of the L2P table as well as its - recovery from normal tear down and power outage. - - Deal with controller- and media-specific constrains. - - Handle I/O errors. - - Implement garbage collection. - - Maintain consistency across the I/O stack during synchronization points. - -For more information please refer to: - - http://lightnvm.io - -which maintains updated FAQs, manual pages, technical documentation, tools, -contacts, etc. diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 1409e40e6345..b7070d76f076 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -160,7 +160,6 @@ Code Seq# Include File Comments 'K' all linux/kd.h 'L' 00-1F linux/loop.h conflict! 'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict! -'L' 20-2F linux/lightnvm.h 'L' E0-FF linux/ppdd.h encrypted disk device driver 'M' all linux/soundcard.h conflict! diff --git a/MAINTAINERS b/MAINTAINERS index c9467d2839f5..ec3f59a16c9a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10609,15 +10609,6 @@ F: LICENSES/ F: scripts/spdxcheck-test.sh F: scripts/spdxcheck.py -LIGHTNVM PLATFORM SUPPORT -M: Matias Bjorling -L: linux-block@vger.kernel.org -S: Maintained -W: http://github/OpenChannelSSD -F: drivers/lightnvm/ -F: include/linux/lightnvm.h -F: include/uapi/linux/lightnvm.h - LINEAR RANGES HELPERS M: Mark Brown R: Matti Vaittinen diff --git a/drivers/Kconfig b/drivers/Kconfig index 8bad63417a50..30d2db37cc87 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -51,8 +51,6 @@ source "drivers/net/Kconfig" source "drivers/isdn/Kconfig" -source "drivers/lightnvm/Kconfig" - # input before char - char/joystick depends on it. As does USB. source "drivers/input/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 27c018bdf4de..be5d40ae1488 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/ obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ obj-$(CONFIG_PARPORT) += parport/ -obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DAX) += dax/ diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig deleted file mode 100644 index 04caa0f2d445..000000000000 --- a/drivers/lightnvm/Kconfig +++ /dev/null @@ -1,44 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Open-Channel SSD NVM configuration -# - -menuconfig NVM - bool "Open-Channel SSD target support (DEPRECATED)" - depends on BLOCK - help - Say Y here to get to enable Open-channel SSDs. - - Open-Channel SSDs implement a set of extension to SSDs, that - exposes direct access to the underlying non-volatile memory. - - If you say N, all options in this submenu will be skipped and disabled - only do this if you know what you are doing. - - This code is deprecated and will be removed in Linux 5.15. - -if NVM - -config NVM_PBLK - tristate "Physical Block Device Open-Channel SSD target" - select CRC32 - help - Allows an open-channel SSD to be exposed as a block device to the - host. The target assumes the device exposes raw flash and must be - explicitly managed by the host. - - Please note the disk format is considered EXPERIMENTAL for now. - -if NVM_PBLK - -config NVM_PBLK_DEBUG - bool "PBlk Debug Support" - default n - help - Enables debug support for pblk. This includes extra checks, more - vocal error messages, and extra tracking fields in the pblk sysfs - entries. - -endif # NVM_PBLK_DEBUG - -endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile deleted file mode 100644 index 97d9d7c71550..000000000000 --- a/drivers/lightnvm/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for Open-Channel SSDs. -# - -obj-$(CONFIG_NVM) := core.o -obj-$(CONFIG_NVM_PBLK) += pblk.o -pblk-y := pblk-init.o pblk-core.o pblk-rb.o \ - pblk-write.o pblk-cache.o pblk-read.o \ - pblk-gc.o pblk-recovery.o pblk-map.o \ - pblk-rl.o pblk-sysfs.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c deleted file mode 100644 index cf8a75494833..000000000000 --- a/drivers/lightnvm/core.c +++ /dev/null @@ -1,1440 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 IT University of Copenhagen. All rights reserved. - * Initial release: Matias Bjorling - */ - -#define pr_fmt(fmt) "nvm: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static LIST_HEAD(nvm_tgt_types); -static DECLARE_RWSEM(nvm_tgtt_lock); -static LIST_HEAD(nvm_devices); -static DECLARE_RWSEM(nvm_lock); - -/* Map between virtual and physical channel and lun */ -struct nvm_ch_map { - int ch_off; - int num_lun; - int *lun_offs; -}; - -struct nvm_dev_map { - struct nvm_ch_map *chnls; - int num_ch; -}; - -static void nvm_free(struct kref *ref); - -static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) -{ - struct nvm_target *tgt; - - list_for_each_entry(tgt, &dev->targets, list) - if (!strcmp(name, tgt->disk->disk_name)) - return tgt; - - return NULL; -} - -static bool nvm_target_exists(const char *name) -{ - struct nvm_dev *dev; - struct nvm_target *tgt; - bool ret = false; - - down_write(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) { - mutex_lock(&dev->mlock); - list_for_each_entry(tgt, &dev->targets, list) { - if (!strcmp(name, tgt->disk->disk_name)) { - ret = true; - mutex_unlock(&dev->mlock); - goto out; - } - } - mutex_unlock(&dev->mlock); - } - -out: - up_write(&nvm_lock); - return ret; -} - -static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) -{ - int i; - - for (i = lun_begin; i <= lun_end; i++) { - if (test_and_set_bit(i, dev->lun_map)) { - pr_err("lun %d already allocated\n", i); - goto err; - } - } - - return 0; -err: - while (--i >= lun_begin) - clear_bit(i, dev->lun_map); - - return -EBUSY; -} - -static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin, - int lun_end) -{ - int i; - - for (i = lun_begin; i <= lun_end; i++) - WARN_ON(!test_and_clear_bit(i, dev->lun_map)); -} - -static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_dev_map *dev_map = tgt_dev->map; - int i, j; - - for (i = 0; i < dev_map->num_ch; i++) { - struct nvm_ch_map *ch_map = &dev_map->chnls[i]; - int *lun_offs = ch_map->lun_offs; - int ch = i + ch_map->ch_off; - - if (clear) { - for (j = 0; j < ch_map->num_lun; j++) { - int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.num_lun) + lun; - - WARN_ON(!test_and_clear_bit(lunid, - dev->lun_map)); - } - } - - kfree(ch_map->lun_offs); - } - - kfree(dev_map->chnls); - kfree(dev_map); - - kfree(tgt_dev->luns); - kfree(tgt_dev); -} - -static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, - u16 lun_begin, u16 lun_end, - u16 op) -{ - struct nvm_tgt_dev *tgt_dev = NULL; - struct nvm_dev_map *dev_rmap = dev->rmap; - struct nvm_dev_map *dev_map; - struct ppa_addr *luns; - int num_lun = lun_end - lun_begin + 1; - int luns_left = num_lun; - int num_ch = num_lun / dev->geo.num_lun; - int num_ch_mod = num_lun % dev->geo.num_lun; - int bch = lun_begin / dev->geo.num_lun; - int blun = lun_begin % dev->geo.num_lun; - int lunid = 0; - int lun_balanced = 1; - int sec_per_lun, prev_num_lun; - int i, j; - - num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1; - - dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); - if (!dev_map) - goto err_dev; - - dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL); - if (!dev_map->chnls) - goto err_chnls; - - luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL); - if (!luns) - goto err_luns; - - prev_num_lun = (luns_left > dev->geo.num_lun) ? - dev->geo.num_lun : luns_left; - for (i = 0; i < num_ch; i++) { - struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; - int *lun_roffs = ch_rmap->lun_offs; - struct nvm_ch_map *ch_map = &dev_map->chnls[i]; - int *lun_offs; - int luns_in_chnl = (luns_left > dev->geo.num_lun) ? - dev->geo.num_lun : luns_left; - - if (lun_balanced && prev_num_lun != luns_in_chnl) - lun_balanced = 0; - - ch_map->ch_off = ch_rmap->ch_off = bch; - ch_map->num_lun = luns_in_chnl; - - lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); - if (!lun_offs) - goto err_ch; - - for (j = 0; j < luns_in_chnl; j++) { - luns[lunid].ppa = 0; - luns[lunid].a.ch = i; - luns[lunid++].a.lun = j; - - lun_offs[j] = blun; - lun_roffs[j + blun] = blun; - } - - ch_map->lun_offs = lun_offs; - - /* when starting a new channel, lun offset is reset */ - blun = 0; - luns_left -= luns_in_chnl; - } - - dev_map->num_ch = num_ch; - - tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); - if (!tgt_dev) - goto err_ch; - - /* Inherit device geometry from parent */ - memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); - - /* Target device only owns a portion of the physical device */ - tgt_dev->geo.num_ch = num_ch; - tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1; - tgt_dev->geo.all_luns = num_lun; - tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk; - - tgt_dev->geo.op = op; - - sec_per_lun = dev->geo.clba * dev->geo.num_chk; - tgt_dev->geo.total_secs = num_lun * sec_per_lun; - - tgt_dev->q = dev->q; - tgt_dev->map = dev_map; - tgt_dev->luns = luns; - tgt_dev->parent = dev; - - return tgt_dev; -err_ch: - while (--i >= 0) - kfree(dev_map->chnls[i].lun_offs); - kfree(luns); -err_luns: - kfree(dev_map->chnls); -err_chnls: - kfree(dev_map); -err_dev: - return tgt_dev; -} - -static struct nvm_tgt_type *__nvm_find_target_type(const char *name) -{ - struct nvm_tgt_type *tt; - - list_for_each_entry(tt, &nvm_tgt_types, list) - if (!strcmp(name, tt->name)) - return tt; - - return NULL; -} - -static struct nvm_tgt_type *nvm_find_target_type(const char *name) -{ - struct nvm_tgt_type *tt; - - down_write(&nvm_tgtt_lock); - tt = __nvm_find_target_type(name); - up_write(&nvm_tgtt_lock); - - return tt; -} - -static int nvm_config_check_luns(struct nvm_geo *geo, int lun_begin, - int lun_end) -{ - if (lun_begin > lun_end || lun_end >= geo->all_luns) { - pr_err("lun out of bound (%u:%u > %u)\n", - lun_begin, lun_end, geo->all_luns - 1); - return -EINVAL; - } - - return 0; -} - -static int __nvm_config_simple(struct nvm_dev *dev, - struct nvm_ioctl_create_simple *s) -{ - struct nvm_geo *geo = &dev->geo; - - if (s->lun_begin == -1 && s->lun_end == -1) { - s->lun_begin = 0; - s->lun_end = geo->all_luns - 1; - } - - return nvm_config_check_luns(geo, s->lun_begin, s->lun_end); -} - -static int __nvm_config_extended(struct nvm_dev *dev, - struct nvm_ioctl_create_extended *e) -{ - if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) { - e->lun_begin = 0; - e->lun_end = dev->geo.all_luns - 1; - } - - /* op not set falls into target's default */ - if (e->op == 0xFFFF) { - e->op = NVM_TARGET_DEFAULT_OP; - } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) { - pr_err("invalid over provisioning value\n"); - return -EINVAL; - } - - return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end); -} - -static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) -{ - struct nvm_ioctl_create_extended e; - struct gendisk *tdisk; - struct nvm_tgt_type *tt; - struct nvm_target *t; - struct nvm_tgt_dev *tgt_dev; - void *targetdata; - unsigned int mdts; - int ret; - - switch (create->conf.type) { - case NVM_CONFIG_TYPE_SIMPLE: - ret = __nvm_config_simple(dev, &create->conf.s); - if (ret) - return ret; - - e.lun_begin = create->conf.s.lun_begin; - e.lun_end = create->conf.s.lun_end; - e.op = NVM_TARGET_DEFAULT_OP; - break; - case NVM_CONFIG_TYPE_EXTENDED: - ret = __nvm_config_extended(dev, &create->conf.e); - if (ret) - return ret; - - e = create->conf.e; - break; - default: - pr_err("config type not valid\n"); - return -EINVAL; - } - - tt = nvm_find_target_type(create->tgttype); - if (!tt) { - pr_err("target type %s not found\n", create->tgttype); - return -EINVAL; - } - - if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) { - pr_err("device is incompatible with target L2P type.\n"); - return -EINVAL; - } - - if (nvm_target_exists(create->tgtname)) { - pr_err("target name already exists (%s)\n", - create->tgtname); - return -EINVAL; - } - - ret = nvm_reserve_luns(dev, e.lun_begin, e.lun_end); - if (ret) - return ret; - - t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); - if (!t) { - ret = -ENOMEM; - goto err_reserve; - } - - tgt_dev = nvm_create_tgt_dev(dev, e.lun_begin, e.lun_end, e.op); - if (!tgt_dev) { - pr_err("could not create target device\n"); - ret = -ENOMEM; - goto err_t; - } - - tdisk = blk_alloc_disk(dev->q->node); - if (!tdisk) { - ret = -ENOMEM; - goto err_dev; - } - - strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name)); - tdisk->major = 0; - tdisk->first_minor = 0; - tdisk->fops = tt->bops; - - targetdata = tt->init(tgt_dev, tdisk, create->flags); - if (IS_ERR(targetdata)) { - ret = PTR_ERR(targetdata); - goto err_init; - } - - tdisk->private_data = targetdata; - tdisk->queue->queuedata = targetdata; - - mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA; - if (dev->geo.mdts) { - mdts = min_t(u32, dev->geo.mdts, - (dev->geo.csecs >> 9) * NVM_MAX_VLBA); - } - blk_queue_max_hw_sectors(tdisk->queue, mdts); - - set_capacity(tdisk, tt->capacity(targetdata)); - add_disk(tdisk); - - if (tt->sysfs_init && tt->sysfs_init(tdisk)) { - ret = -ENOMEM; - goto err_sysfs; - } - - t->type = tt; - t->disk = tdisk; - t->dev = tgt_dev; - - mutex_lock(&dev->mlock); - list_add_tail(&t->list, &dev->targets); - mutex_unlock(&dev->mlock); - - __module_get(tt->owner); - - return 0; -err_sysfs: - if (tt->exit) - tt->exit(targetdata, true); -err_init: - blk_cleanup_disk(tdisk); -err_dev: - nvm_remove_tgt_dev(tgt_dev, 0); -err_t: - kfree(t); -err_reserve: - nvm_release_luns_err(dev, e.lun_begin, e.lun_end); - return ret; -} - -static void __nvm_remove_target(struct nvm_target *t, bool graceful) -{ - struct nvm_tgt_type *tt = t->type; - struct gendisk *tdisk = t->disk; - - del_gendisk(tdisk); - - if (tt->sysfs_exit) - tt->sysfs_exit(tdisk); - - if (tt->exit) - tt->exit(tdisk->private_data, graceful); - - nvm_remove_tgt_dev(t->dev, 1); - blk_cleanup_disk(tdisk); - module_put(t->type->owner); - - list_del(&t->list); - kfree(t); -} - -/** - * nvm_remove_tgt - Removes a target from the media manager - * @remove: ioctl structure with target name to remove. - * - * Returns: - * 0: on success - * 1: on not found - * <0: on error - */ -static int nvm_remove_tgt(struct nvm_ioctl_remove *remove) -{ - struct nvm_target *t = NULL; - struct nvm_dev *dev; - - down_read(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) { - mutex_lock(&dev->mlock); - t = nvm_find_target(dev, remove->tgtname); - if (t) { - mutex_unlock(&dev->mlock); - break; - } - mutex_unlock(&dev->mlock); - } - up_read(&nvm_lock); - - if (!t) { - pr_err("failed to remove target %s\n", - remove->tgtname); - return 1; - } - - __nvm_remove_target(t, true); - kref_put(&dev->ref, nvm_free); - - return 0; -} - -static int nvm_register_map(struct nvm_dev *dev) -{ - struct nvm_dev_map *rmap; - int i, j; - - rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); - if (!rmap) - goto err_rmap; - - rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map), - GFP_KERNEL); - if (!rmap->chnls) - goto err_chnls; - - for (i = 0; i < dev->geo.num_ch; i++) { - struct nvm_ch_map *ch_rmap; - int *lun_roffs; - int luns_in_chnl = dev->geo.num_lun; - - ch_rmap = &rmap->chnls[i]; - - ch_rmap->ch_off = -1; - ch_rmap->num_lun = luns_in_chnl; - - lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); - if (!lun_roffs) - goto err_ch; - - for (j = 0; j < luns_in_chnl; j++) - lun_roffs[j] = -1; - - ch_rmap->lun_offs = lun_roffs; - } - - dev->rmap = rmap; - - return 0; -err_ch: - while (--i >= 0) - kfree(rmap->chnls[i].lun_offs); -err_chnls: - kfree(rmap); -err_rmap: - return -ENOMEM; -} - -static void nvm_unregister_map(struct nvm_dev *dev) -{ - struct nvm_dev_map *rmap = dev->rmap; - int i; - - for (i = 0; i < dev->geo.num_ch; i++) - kfree(rmap->chnls[i].lun_offs); - - kfree(rmap->chnls); - kfree(rmap); -} - -static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) -{ - struct nvm_dev_map *dev_map = tgt_dev->map; - struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch]; - int lun_off = ch_map->lun_offs[p->a.lun]; - - p->a.ch += ch_map->ch_off; - p->a.lun += lun_off; -} - -static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_dev_map *dev_rmap = dev->rmap; - struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch]; - int lun_roff = ch_rmap->lun_offs[p->a.lun]; - - p->a.ch -= ch_rmap->ch_off; - p->a.lun -= lun_roff; -} - -static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr *ppa_list, int nr_ppas) -{ - int i; - - for (i = 0; i < nr_ppas; i++) { - nvm_map_to_dev(tgt_dev, &ppa_list[i]); - ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]); - } -} - -static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr *ppa_list, int nr_ppas) -{ - int i; - - for (i = 0; i < nr_ppas; i++) { - ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]); - nvm_map_to_tgt(tgt_dev, &ppa_list[i]); - } -} - -static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas); -} - -static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas); -} - -int nvm_register_tgt_type(struct nvm_tgt_type *tt) -{ - int ret = 0; - - down_write(&nvm_tgtt_lock); - if (__nvm_find_target_type(tt->name)) - ret = -EEXIST; - else - list_add(&tt->list, &nvm_tgt_types); - up_write(&nvm_tgtt_lock); - - return ret; -} -EXPORT_SYMBOL(nvm_register_tgt_type); - -void nvm_unregister_tgt_type(struct nvm_tgt_type *tt) -{ - if (!tt) - return; - - down_write(&nvm_tgtt_lock); - list_del(&tt->list); - up_write(&nvm_tgtt_lock); -} -EXPORT_SYMBOL(nvm_unregister_tgt_type); - -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags, - dma_addr_t *dma_handler) -{ - return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags, - dma_handler); -} -EXPORT_SYMBOL(nvm_dev_dma_alloc); - -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) -{ - dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler); -} -EXPORT_SYMBOL(nvm_dev_dma_free); - -static struct nvm_dev *nvm_find_nvm_dev(const char *name) -{ - struct nvm_dev *dev; - - list_for_each_entry(dev, &nvm_devices, devices) - if (!strcmp(name, dev->name)) - return dev; - - return NULL; -} - -static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - const struct ppa_addr *ppas, int nr_ppas) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_geo *geo = &tgt_dev->geo; - int i, plane_cnt, pl_idx; - struct ppa_addr ppa; - - if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { - rqd->nr_ppas = nr_ppas; - rqd->ppa_addr = ppas[0]; - - return 0; - } - - rqd->nr_ppas = nr_ppas; - rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); - if (!rqd->ppa_list) { - pr_err("failed to allocate dma memory\n"); - return -ENOMEM; - } - - plane_cnt = geo->pln_mode; - rqd->nr_ppas *= plane_cnt; - - for (i = 0; i < nr_ppas; i++) { - for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { - ppa = ppas[i]; - ppa.g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; - } - } - - return 0; -} - -static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, - struct nvm_rq *rqd) -{ - if (!rqd->ppa_list) - return; - - nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); -} - -static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) -{ - int flags = 0; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - if (rqd->is_seq) - flags |= geo->pln_mode >> 1; - - if (rqd->opcode == NVM_OP_PREAD) - flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND); - else if (rqd->opcode == NVM_OP_PWRITE) - flags |= NVM_IO_SCRAMBLE_ENABLE; - - return flags; -} - -int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, void *buf) -{ - struct nvm_dev *dev = tgt_dev->parent; - int ret; - - if (!dev->ops->submit_io) - return -ENODEV; - - nvm_rq_tgt_to_dev(tgt_dev, rqd); - - rqd->dev = tgt_dev; - rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); - - /* In case of error, fail with right address format */ - ret = dev->ops->submit_io(dev, rqd, buf); - if (ret) - nvm_rq_dev_to_tgt(tgt_dev, rqd); - return ret; -} -EXPORT_SYMBOL(nvm_submit_io); - -static void nvm_sync_end_io(struct nvm_rq *rqd) -{ - struct completion *waiting = rqd->private; - - complete(waiting); -} - -static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd, - void *buf) -{ - DECLARE_COMPLETION_ONSTACK(wait); - int ret = 0; - - rqd->end_io = nvm_sync_end_io; - rqd->private = &wait; - - ret = dev->ops->submit_io(dev, rqd, buf); - if (ret) - return ret; - - wait_for_completion_io(&wait); - - return 0; -} - -int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - void *buf) -{ - struct nvm_dev *dev = tgt_dev->parent; - int ret; - - if (!dev->ops->submit_io) - return -ENODEV; - - nvm_rq_tgt_to_dev(tgt_dev, rqd); - - rqd->dev = tgt_dev; - rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); - - ret = nvm_submit_io_wait(dev, rqd, buf); - - return ret; -} -EXPORT_SYMBOL(nvm_submit_io_sync); - -void nvm_end_io(struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *tgt_dev = rqd->dev; - - /* Convert address space */ - if (tgt_dev) - nvm_rq_dev_to_tgt(tgt_dev, rqd); - - if (rqd->end_io) - rqd->end_io(rqd); -} -EXPORT_SYMBOL(nvm_end_io); - -static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - if (!dev->ops->submit_io) - return -ENODEV; - - rqd->dev = NULL; - rqd->flags = nvm_set_flags(&dev->geo, rqd); - - return nvm_submit_io_wait(dev, rqd, NULL); -} - -static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) -{ - struct nvm_rq rqd = { NULL }; - struct bio bio; - struct bio_vec bio_vec; - struct page *page; - int ret; - - page = alloc_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - bio_init(&bio, &bio_vec, 1); - bio_add_page(&bio, page, PAGE_SIZE, 0); - bio_set_op_attrs(&bio, REQ_OP_READ, 0); - - rqd.bio = &bio; - rqd.opcode = NVM_OP_PREAD; - rqd.is_seq = 1; - rqd.nr_ppas = 1; - rqd.ppa_addr = generic_to_dev_addr(dev, ppa); - - ret = nvm_submit_io_sync_raw(dev, &rqd); - __free_page(page); - if (ret) - return ret; - - return rqd.error; -} - -/* - * Scans a 1.2 chunk first and last page to determine if its state. - * If the chunk is found to be open, also scan it to update the write - * pointer. - */ -static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa, - struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &dev->geo; - int ret, pg, pl; - - /* sense first page */ - ret = nvm_bb_chunk_sense(dev, ppa); - if (ret < 0) /* io error */ - return ret; - else if (ret == 0) /* valid data */ - meta->state = NVM_CHK_ST_OPEN; - else if (ret > 0) { - /* - * If empty page, the chunk is free, else it is an - * actual io error. In that case, mark it offline. - */ - switch (ret) { - case NVM_RSP_ERR_EMPTYPAGE: - meta->state = NVM_CHK_ST_FREE; - return 0; - case NVM_RSP_ERR_FAILCRC: - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_WARN_HIGHECC: - meta->state = NVM_CHK_ST_OPEN; - goto scan; - default: - return -ret; /* other io error */ - } - } - - /* sense last page */ - ppa.g.pg = geo->num_pg - 1; - ppa.g.pl = geo->num_pln - 1; - - ret = nvm_bb_chunk_sense(dev, ppa); - if (ret < 0) /* io error */ - return ret; - else if (ret == 0) { /* Chunk fully written */ - meta->state = NVM_CHK_ST_CLOSED; - meta->wp = geo->clba; - return 0; - } else if (ret > 0) { - switch (ret) { - case NVM_RSP_ERR_EMPTYPAGE: - case NVM_RSP_ERR_FAILCRC: - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_WARN_HIGHECC: - meta->state = NVM_CHK_ST_OPEN; - break; - default: - return -ret; /* other io error */ - } - } - -scan: - /* - * chunk is open, we scan sequentially to update the write pointer. - * We make the assumption that targets write data across all planes - * before moving to the next page. - */ - for (pg = 0; pg < geo->num_pg; pg++) { - for (pl = 0; pl < geo->num_pln; pl++) { - ppa.g.pg = pg; - ppa.g.pl = pl; - - ret = nvm_bb_chunk_sense(dev, ppa); - if (ret < 0) /* io error */ - return ret; - else if (ret == 0) { - meta->wp += geo->ws_min; - } else if (ret > 0) { - switch (ret) { - case NVM_RSP_ERR_EMPTYPAGE: - return 0; - case NVM_RSP_ERR_FAILCRC: - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_WARN_HIGHECC: - meta->wp += geo->ws_min; - break; - default: - return -ret; /* other io error */ - } - } - } - } - - return 0; -} - -/* - * folds a bad block list from its plane representation to its - * chunk representation. - * - * If any of the planes status are bad or grown bad, the chunk is marked - * offline. If not bad, the first plane state acts as the chunk state. - */ -static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, - u8 *blks, int nr_blks, struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &dev->geo; - int ret, blk, pl, offset, blktype; - - for (blk = 0; blk < geo->num_chk; blk++) { - offset = blk * geo->pln_mode; - blktype = blks[offset]; - - for (pl = 0; pl < geo->pln_mode; pl++) { - if (blks[offset + pl] & - (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { - blktype = blks[offset + pl]; - break; - } - } - - ppa.g.blk = blk; - - meta->wp = 0; - meta->type = NVM_CHK_TP_W_SEQ; - meta->wi = 0; - meta->slba = generic_to_dev_addr(dev, ppa).ppa; - meta->cnlb = dev->geo.clba; - - if (blktype == NVM_BLK_T_FREE) { - ret = nvm_bb_chunk_scan(dev, ppa, meta); - if (ret) - return ret; - } else { - meta->state = NVM_CHK_ST_OFFLINE; - } - - meta++; - } - - return 0; -} - -static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, - int nchks, struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr ppa; - u8 *blks; - int ch, lun, nr_blks; - int ret = 0; - - ppa.ppa = slba; - ppa = dev_to_generic_addr(dev, ppa); - - if (ppa.g.blk != 0) - return -EINVAL; - - if ((nchks % geo->num_chk) != 0) - return -EINVAL; - - nr_blks = geo->num_chk * geo->pln_mode; - - blks = kmalloc(nr_blks, GFP_KERNEL); - if (!blks) - return -ENOMEM; - - for (ch = ppa.g.ch; ch < geo->num_ch; ch++) { - for (lun = ppa.g.lun; lun < geo->num_lun; lun++) { - struct ppa_addr ppa_gen, ppa_dev; - - if (!nchks) - goto done; - - ppa_gen.ppa = 0; - ppa_gen.g.ch = ch; - ppa_gen.g.lun = lun; - ppa_dev = generic_to_dev_addr(dev, ppa_gen); - - ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks); - if (ret) - goto done; - - ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks, - meta); - if (ret) - goto done; - - meta += geo->num_chk; - nchks -= geo->num_chk; - } - } -done: - kfree(blks); - return ret; -} - -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, - int nchks, struct nvm_chk_meta *meta) -{ - struct nvm_dev *dev = tgt_dev->parent; - - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - - if (dev->geo.version == NVM_OCSSD_SPEC_12) - return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta); - - return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta); -} -EXPORT_SYMBOL_GPL(nvm_get_chunk_meta); - -int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; - - if (dev->geo.version == NVM_OCSSD_SPEC_20) - return 0; - - if (nr_ppas > NVM_MAX_VLBA) { - pr_err("unable to update all blocks atomically\n"); - return -EINVAL; - } - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - nvm_rq_tgt_to_dev(tgt_dev, &rqd); - - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(tgt_dev, &rqd); - if (ret) - return -EINVAL; - - return 0; -} -EXPORT_SYMBOL_GPL(nvm_set_chunk_meta); - -static int nvm_core_init(struct nvm_dev *dev) -{ - struct nvm_geo *geo = &dev->geo; - int ret; - - dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->lun_map) - return -ENOMEM; - - INIT_LIST_HEAD(&dev->area_list); - INIT_LIST_HEAD(&dev->targets); - mutex_init(&dev->mlock); - spin_lock_init(&dev->lock); - - ret = nvm_register_map(dev); - if (ret) - goto err_fmtype; - - return 0; -err_fmtype: - kfree(dev->lun_map); - return ret; -} - -static void nvm_free(struct kref *ref) -{ - struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref); - - if (dev->dma_pool) - dev->ops->destroy_dma_pool(dev->dma_pool); - - if (dev->rmap) - nvm_unregister_map(dev); - - kfree(dev->lun_map); - kfree(dev); -} - -static int nvm_init(struct nvm_dev *dev) -{ - struct nvm_geo *geo = &dev->geo; - int ret = -EINVAL; - - if (dev->ops->identity(dev)) { - pr_err("device could not be identified\n"); - goto err; - } - - pr_debug("ver:%u.%u nvm_vendor:%x\n", geo->major_ver_id, - geo->minor_ver_id, geo->vmnt); - - ret = nvm_core_init(dev); - if (ret) { - pr_err("could not initialize core structures.\n"); - goto err; - } - - pr_info("registered %s [%u/%u/%u/%u/%u]\n", - dev->name, dev->geo.ws_min, dev->geo.ws_opt, - dev->geo.num_chk, dev->geo.all_luns, - dev->geo.num_ch); - return 0; -err: - pr_err("failed to initialize nvm\n"); - return ret; -} - -struct nvm_dev *nvm_alloc_dev(int node) -{ - struct nvm_dev *dev; - - dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); - if (dev) - kref_init(&dev->ref); - - return dev; -} -EXPORT_SYMBOL(nvm_alloc_dev); - -int nvm_register(struct nvm_dev *dev) -{ - int ret, exp_pool_size; - - pr_warn_once("lightnvm support is deprecated and will be removed in Linux 5.15.\n"); - - if (!dev->q || !dev->ops) { - kref_put(&dev->ref, nvm_free); - return -EINVAL; - } - - ret = nvm_init(dev); - if (ret) { - kref_put(&dev->ref, nvm_free); - return ret; - } - - exp_pool_size = max_t(int, PAGE_SIZE, - (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); - exp_pool_size = round_up(exp_pool_size, PAGE_SIZE); - - dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", - exp_pool_size); - if (!dev->dma_pool) { - pr_err("could not create dma pool\n"); - kref_put(&dev->ref, nvm_free); - return -ENOMEM; - } - - /* register device with a supported media manager */ - down_write(&nvm_lock); - list_add(&dev->devices, &nvm_devices); - up_write(&nvm_lock); - - return 0; -} -EXPORT_SYMBOL(nvm_register); - -void nvm_unregister(struct nvm_dev *dev) -{ - struct nvm_target *t, *tmp; - - mutex_lock(&dev->mlock); - list_for_each_entry_safe(t, tmp, &dev->targets, list) { - if (t->dev->parent != dev) - continue; - __nvm_remove_target(t, false); - kref_put(&dev->ref, nvm_free); - } - mutex_unlock(&dev->mlock); - - down_write(&nvm_lock); - list_del(&dev->devices); - up_write(&nvm_lock); - - kref_put(&dev->ref, nvm_free); -} -EXPORT_SYMBOL(nvm_unregister); - -static int __nvm_configure_create(struct nvm_ioctl_create *create) -{ - struct nvm_dev *dev; - int ret; - - down_write(&nvm_lock); - dev = nvm_find_nvm_dev(create->dev); - up_write(&nvm_lock); - - if (!dev) { - pr_err("device not found\n"); - return -EINVAL; - } - - kref_get(&dev->ref); - ret = nvm_create_tgt(dev, create); - if (ret) - kref_put(&dev->ref, nvm_free); - - return ret; -} - -static long nvm_ioctl_info(struct file *file, void __user *arg) -{ - struct nvm_ioctl_info *info; - struct nvm_tgt_type *tt; - int tgt_iter = 0; - - info = memdup_user(arg, sizeof(struct nvm_ioctl_info)); - if (IS_ERR(info)) - return PTR_ERR(info); - - info->version[0] = NVM_VERSION_MAJOR; - info->version[1] = NVM_VERSION_MINOR; - info->version[2] = NVM_VERSION_PATCH; - - down_write(&nvm_tgtt_lock); - list_for_each_entry(tt, &nvm_tgt_types, list) { - struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter]; - - tgt->version[0] = tt->version[0]; - tgt->version[1] = tt->version[1]; - tgt->version[2] = tt->version[2]; - strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX); - - tgt_iter++; - } - - info->tgtsize = tgt_iter; - up_write(&nvm_tgtt_lock); - - if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) { - kfree(info); - return -EFAULT; - } - - kfree(info); - return 0; -} - -static long nvm_ioctl_get_devices(struct file *file, void __user *arg) -{ - struct nvm_ioctl_get_devices *devices; - struct nvm_dev *dev; - int i = 0; - - devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL); - if (!devices) - return -ENOMEM; - - down_write(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) { - struct nvm_ioctl_device_info *info = &devices->info[i]; - - strlcpy(info->devname, dev->name, sizeof(info->devname)); - - /* kept for compatibility */ - info->bmversion[0] = 1; - info->bmversion[1] = 0; - info->bmversion[2] = 0; - strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); - i++; - - if (i >= ARRAY_SIZE(devices->info)) { - pr_err("max %zd devices can be reported.\n", - ARRAY_SIZE(devices->info)); - break; - } - } - up_write(&nvm_lock); - - devices->nr_devices = i; - - if (copy_to_user(arg, devices, - sizeof(struct nvm_ioctl_get_devices))) { - kfree(devices); - return -EFAULT; - } - - kfree(devices); - return 0; -} - -static long nvm_ioctl_dev_create(struct file *file, void __user *arg) -{ - struct nvm_ioctl_create create; - - if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) - return -EFAULT; - - if (create.conf.type == NVM_CONFIG_TYPE_EXTENDED && - create.conf.e.rsv != 0) { - pr_err("reserved config field in use\n"); - return -EINVAL; - } - - create.dev[DISK_NAME_LEN - 1] = '\0'; - create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0'; - create.tgtname[DISK_NAME_LEN - 1] = '\0'; - - if (create.flags != 0) { - __u32 flags = create.flags; - - /* Check for valid flags */ - if (flags & NVM_TARGET_FACTORY) - flags &= ~NVM_TARGET_FACTORY; - - if (flags) { - pr_err("flag not supported\n"); - return -EINVAL; - } - } - - return __nvm_configure_create(&create); -} - -static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) -{ - struct nvm_ioctl_remove remove; - - if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) - return -EFAULT; - - remove.tgtname[DISK_NAME_LEN - 1] = '\0'; - - if (remove.flags != 0) { - pr_err("no flags supported\n"); - return -EINVAL; - } - - return nvm_remove_tgt(&remove); -} - -/* kept for compatibility reasons */ -static long nvm_ioctl_dev_init(struct file *file, void __user *arg) -{ - struct nvm_ioctl_dev_init init; - - if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init))) - return -EFAULT; - - if (init.flags != 0) { - pr_err("no flags supported\n"); - return -EINVAL; - } - - return 0; -} - -/* Kept for compatibility reasons */ -static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) -{ - struct nvm_ioctl_dev_factory fact; - - if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory))) - return -EFAULT; - - fact.dev[DISK_NAME_LEN - 1] = '\0'; - - if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) - return -EINVAL; - - return 0; -} - -static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - switch (cmd) { - case NVM_INFO: - return nvm_ioctl_info(file, argp); - case NVM_GET_DEVICES: - return nvm_ioctl_get_devices(file, argp); - case NVM_DEV_CREATE: - return nvm_ioctl_dev_create(file, argp); - case NVM_DEV_REMOVE: - return nvm_ioctl_dev_remove(file, argp); - case NVM_DEV_INIT: - return nvm_ioctl_dev_init(file, argp); - case NVM_DEV_FACTORY: - return nvm_ioctl_dev_factory(file, argp); - } - return 0; -} - -static const struct file_operations _ctl_fops = { - .open = nonseekable_open, - .unlocked_ioctl = nvm_ctl_ioctl, - .owner = THIS_MODULE, - .llseek = noop_llseek, -}; - -static struct miscdevice _nvm_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "lightnvm", - .nodename = "lightnvm/control", - .fops = &_ctl_fops, -}; -builtin_misc_device(_nvm_misc); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c deleted file mode 100644 index f185f1a00008..000000000000 --- a/drivers/lightnvm/pblk-cache.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-cache.c - pblk's write cache - */ - -#include "pblk.h" - -void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, - unsigned long flags) -{ - struct pblk_w_ctx w_ctx; - sector_t lba = pblk_get_lba(bio); - unsigned long start_time; - unsigned int bpos, pos; - int nr_entries = pblk_get_secs(bio); - int i, ret; - - start_time = bio_start_io_acct(bio); - - /* Update the write buffer head (mem) with the entries that we can - * write. The write in itself cannot fail, so there is no need to - * rollback from here on. - */ -retry: - ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); - switch (ret) { - case NVM_IO_REQUEUE: - io_schedule(); - goto retry; - case NVM_IO_ERR: - pblk_pipeline_stop(pblk); - bio_io_error(bio); - goto out; - } - - pblk_ppa_set_empty(&w_ctx.ppa); - w_ctx.flags = flags; - if (bio->bi_opf & REQ_PREFLUSH) { - w_ctx.flags |= PBLK_FLUSH_ENTRY; - pblk_write_kick(pblk); - } - - if (unlikely(!bio_has_data(bio))) - goto out; - - for (i = 0; i < nr_entries; i++) { - void *data = bio_data(bio); - - w_ctx.lba = lba + i; - - pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i); - pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos); - - bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); - } - - atomic64_add(nr_entries, &pblk->user_wa); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(nr_entries, &pblk->inflight_writes); - atomic_long_add(nr_entries, &pblk->req_writes); -#endif - - pblk_rl_inserted(&pblk->rl, nr_entries); - -out: - bio_end_io_acct(bio, start_time); - pblk_write_should_kick(pblk); - - if (ret == NVM_IO_DONE) - bio_endio(bio); -} - -/* - * On GC the incoming lbas are not necessarily sequential. Also, some of the - * lbas might not be valid entries, which are marked as empty by the GC thread - */ -int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq) -{ - struct pblk_w_ctx w_ctx; - unsigned int bpos, pos; - void *data = gc_rq->data; - int i, valid_entries; - - /* Update the write buffer head (mem) with the entries that we can - * write. The write in itself cannot fail, so there is no need to - * rollback from here on. - */ -retry: - if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) { - io_schedule(); - goto retry; - } - - w_ctx.flags = PBLK_IOTYPE_GC; - pblk_ppa_set_empty(&w_ctx.ppa); - - for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) { - if (gc_rq->lba_list[i] == ADDR_EMPTY) - continue; - - w_ctx.lba = gc_rq->lba_list[i]; - - pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries); - pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line, - gc_rq->paddr_list[i], pos); - - data += PBLK_EXPOSED_PAGE_SIZE; - valid_entries++; - } - - WARN_ONCE(gc_rq->secs_to_gc != valid_entries, - "pblk: inconsistent GC write\n"); - - atomic64_add(valid_entries, &pblk->gc_wa); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(valid_entries, &pblk->inflight_writes); - atomic_long_add(valid_entries, &pblk->recov_gc_writes); -#endif - - pblk_write_should_kick(pblk); - return NVM_IO_OK; -} diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c deleted file mode 100644 index 33d39d3dd343..000000000000 --- a/drivers/lightnvm/pblk-core.c +++ /dev/null @@ -1,2151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-core.c - pblk's core functionality - * - */ - -#define CREATE_TRACE_POINTS - -#include "pblk.h" -#include "pblk-trace.h" - -static void pblk_line_mark_bb(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa = line_ws->priv; - int ret; - - ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); - if (ret) { - struct pblk_line *line; - int pos; - - line = pblk_ppa_to_line(pblk, *ppa); - pos = pblk_ppa_to_pos(&dev->geo, *ppa); - - pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n", - line->id, pos); - } - - kfree(ppa); - mempool_free(line_ws, &pblk->gen_ws_pool); -} - -static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, - struct ppa_addr ppa_addr) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa; - int pos = pblk_ppa_to_pos(geo, ppa_addr); - - pblk_debug(pblk, "erase failed: line:%d, pos:%d\n", line->id, pos); - atomic_long_inc(&pblk->erase_failed); - - atomic_dec(&line->blk_in_line); - if (test_and_set_bit(pos, line->blk_bitmap)) - pblk_err(pblk, "attempted to erase bb: line:%d, pos:%d\n", - line->id, pos); - - /* Not necessary to mark bad blocks on 2.0 spec. */ - if (geo->version == NVM_OCSSD_SPEC_20) - return; - - ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); - if (!ppa) - return; - - *ppa = ppa_addr; - pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, - GFP_ATOMIC, pblk->bb_wq); -} - -static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_chk_meta *chunk; - struct pblk_line *line; - int pos; - - line = pblk_ppa_to_line(pblk, rqd->ppa_addr); - pos = pblk_ppa_to_pos(geo, rqd->ppa_addr); - chunk = &line->chks[pos]; - - atomic_dec(&line->left_seblks); - - if (rqd->error) { - trace_pblk_chunk_reset(pblk_disk_name(pblk), - &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED); - - chunk->state = NVM_CHK_ST_OFFLINE; - pblk_mark_bb(pblk, line, rqd->ppa_addr); - } else { - trace_pblk_chunk_reset(pblk_disk_name(pblk), - &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE); - - chunk->state = NVM_CHK_ST_FREE; - } - - trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr, - chunk->state); - - atomic_dec(&pblk->inflight_io); -} - -/* Erase completion assumes that only one block is erased at the time */ -static void pblk_end_io_erase(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - - __pblk_end_io_erase(pblk, rqd); - mempool_free(rqd, &pblk->e_rq_pool); -} - -/* - * Get information for all chunks from the device. - * - * The caller is responsible for freeing (vmalloc) the returned structure - */ -struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_chk_meta *meta; - struct ppa_addr ppa; - unsigned long len; - int ret; - - ppa.ppa = 0; - - len = geo->all_chunks * sizeof(*meta); - meta = vzalloc(len); - if (!meta) - return ERR_PTR(-ENOMEM); - - ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta); - if (ret) { - vfree(meta); - return ERR_PTR(-EIO); - } - - return meta; -} - -struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, - struct nvm_chk_meta *meta, - struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun; - int lun_off = ppa.m.pu * geo->num_chk; - int chk_off = ppa.m.chk; - - return meta + ch_off + lun_off + chk_off; -} - -void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, - u64 paddr) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list = NULL; - - /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P - * table is modified with reclaimed sectors, a check is done to endure - * that newer updates are not overwritten. - */ - spin_lock(&line->lock); - WARN_ON(line->state == PBLK_LINESTATE_FREE); - - if (test_and_set_bit(paddr, line->invalid_bitmap)) { - WARN_ONCE(1, "pblk: double invalidate\n"); - spin_unlock(&line->lock); - return; - } - le32_add_cpu(line->vsc, -1); - - if (line->state == PBLK_LINESTATE_CLOSED) - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - - if (move_list) { - spin_lock(&l_mg->gc_lock); - spin_lock(&line->lock); - /* Prevent moving a line that has just been chosen for GC */ - if (line->state == PBLK_LINESTATE_GC) { - spin_unlock(&line->lock); - spin_unlock(&l_mg->gc_lock); - return; - } - spin_unlock(&line->lock); - - list_move_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); - } -} - -void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) -{ - struct pblk_line *line; - u64 paddr; - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a device address */ - BUG_ON(pblk_addr_in_cache(ppa)); - BUG_ON(pblk_ppa_empty(ppa)); -#endif - - line = pblk_ppa_to_line(pblk, ppa); - paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); - - __pblk_map_invalidate(pblk, line, paddr); -} - -static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, - unsigned int nr_secs) -{ - sector_t lba; - - spin_lock(&pblk->trans_lock); - for (lba = slba; lba < slba + nr_secs; lba++) { - struct ppa_addr ppa; - - ppa = pblk_trans_map_get(pblk, lba); - - if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa)) - pblk_map_invalidate(pblk, ppa); - - pblk_ppa_set_empty(&ppa); - pblk_trans_map_set(pblk, lba, ppa); - } - spin_unlock(&pblk->trans_lock); -} - -int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) - return -ENOMEM; - - if (rqd->nr_ppas == 1) - return 0; - - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk); - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk); - - return 0; -} - -void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - if (rqd->meta_list) - nvm_dev_dma_free(dev->parent, rqd->meta_list, - rqd->dma_meta_list); -} - -/* Caller must guarantee that the request is a valid type */ -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) -{ - mempool_t *pool; - struct nvm_rq *rqd; - int rq_size; - - switch (type) { - case PBLK_WRITE: - case PBLK_WRITE_INT: - pool = &pblk->w_rq_pool; - rq_size = pblk_w_rq_size; - break; - case PBLK_READ: - pool = &pblk->r_rq_pool; - rq_size = pblk_g_rq_size; - break; - default: - pool = &pblk->e_rq_pool; - rq_size = pblk_g_rq_size; - } - - rqd = mempool_alloc(pool, GFP_KERNEL); - memset(rqd, 0, rq_size); - - return rqd; -} - -/* Typically used on completion path. Cannot guarantee request consistency */ -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) -{ - mempool_t *pool; - - switch (type) { - case PBLK_WRITE: - kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap); - fallthrough; - case PBLK_WRITE_INT: - pool = &pblk->w_rq_pool; - break; - case PBLK_READ: - pool = &pblk->r_rq_pool; - break; - case PBLK_ERASE: - pool = &pblk->e_rq_pool; - break; - default: - pblk_err(pblk, "trying to free unknown rqd type\n"); - return; - } - - pblk_free_rqd_meta(pblk, rqd); - mempool_free(rqd, pool); -} - -void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, - int nr_pages) -{ - struct bio_vec *bv; - struct page *page; - int i, e, nbv = 0; - - for (i = 0; i < bio->bi_vcnt; i++) { - bv = &bio->bi_io_vec[i]; - page = bv->bv_page; - for (e = 0; e < bv->bv_len; e += PBLK_EXPOSED_PAGE_SIZE, nbv++) - if (nbv >= off) - mempool_free(page++, &pblk->page_bio_pool); - } -} - -int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, - int nr_pages) -{ - struct request_queue *q = pblk->dev->q; - struct page *page; - int i, ret; - - for (i = 0; i < nr_pages; i++) { - page = mempool_alloc(&pblk->page_bio_pool, flags); - - ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); - if (ret != PBLK_EXPOSED_PAGE_SIZE) { - pblk_err(pblk, "could not add page to bio\n"); - mempool_free(page, &pblk->page_bio_pool); - goto err; - } - } - - return 0; -err: - pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i); - return -1; -} - -void pblk_write_kick(struct pblk *pblk) -{ - wake_up_process(pblk->writer_ts); - mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000)); -} - -void pblk_write_timer_fn(struct timer_list *t) -{ - struct pblk *pblk = from_timer(pblk, t, wtimer); - - /* kick the write thread every tick to flush outstanding data */ - pblk_write_kick(pblk); -} - -void pblk_write_should_kick(struct pblk *pblk) -{ - unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb); - - if (secs_avail >= pblk->min_write_pgs_data) - pblk_write_kick(pblk); -} - -static void pblk_wait_for_meta(struct pblk *pblk) -{ - do { - if (!atomic_read(&pblk->inflight_io)) - break; - - schedule(); - } while (1); -} - -static void pblk_flush_writer(struct pblk *pblk) -{ - pblk_rb_flush(&pblk->rwb); - do { - if (!pblk_rb_sync_count(&pblk->rwb)) - break; - - pblk_write_kick(pblk); - schedule(); - } while (1); -} - -struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list = NULL; - int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data) - * (pblk->min_write_pgs - pblk->min_write_pgs_data); - int vsc = le32_to_cpu(*line->vsc) + packed_meta; - - lockdep_assert_held(&line->lock); - - if (line->w_err_gc->has_write_err) { - if (line->gc_group != PBLK_LINEGC_WERR) { - line->gc_group = PBLK_LINEGC_WERR; - move_list = &l_mg->gc_werr_list; - pblk_rl_werr_line_in(&pblk->rl); - } - } else if (!vsc) { - if (line->gc_group != PBLK_LINEGC_FULL) { - line->gc_group = PBLK_LINEGC_FULL; - move_list = &l_mg->gc_full_list; - } - } else if (vsc < lm->high_thrs) { - if (line->gc_group != PBLK_LINEGC_HIGH) { - line->gc_group = PBLK_LINEGC_HIGH; - move_list = &l_mg->gc_high_list; - } - } else if (vsc < lm->mid_thrs) { - if (line->gc_group != PBLK_LINEGC_MID) { - line->gc_group = PBLK_LINEGC_MID; - move_list = &l_mg->gc_mid_list; - } - } else if (vsc < line->sec_in_line) { - if (line->gc_group != PBLK_LINEGC_LOW) { - line->gc_group = PBLK_LINEGC_LOW; - move_list = &l_mg->gc_low_list; - } - } else if (vsc == line->sec_in_line) { - if (line->gc_group != PBLK_LINEGC_EMPTY) { - line->gc_group = PBLK_LINEGC_EMPTY; - move_list = &l_mg->gc_empty_list; - } - } else { - line->state = PBLK_LINESTATE_CORRUPT; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - line->gc_group = PBLK_LINEGC_NONE; - move_list = &l_mg->corrupt_list; - pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", - line->id, vsc, - line->sec_in_line, - lm->high_thrs, lm->mid_thrs); - } - - return move_list; -} - -void pblk_discard(struct pblk *pblk, struct bio *bio) -{ - sector_t slba = pblk_get_lba(bio); - sector_t nr_secs = pblk_get_secs(bio); - - pblk_invalidate_range(pblk, slba, nr_secs); -} - -void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd) -{ - atomic_long_inc(&pblk->write_failed); -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_print_failed_rqd(pblk, rqd, rqd->error); -#endif -} - -void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd) -{ - /* Empty page read is not necessarily an error (e.g., L2P recovery) */ - if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { - atomic_long_inc(&pblk->read_empty); - return; - } - - switch (rqd->error) { - case NVM_RSP_WARN_HIGHECC: - atomic_long_inc(&pblk->read_high_ecc); - break; - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_ERR_FAILCRC: - atomic_long_inc(&pblk->read_failed); - break; - default: - pblk_err(pblk, "unknown read error:%d\n", rqd->error); - } -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_print_failed_rqd(pblk, rqd, rqd->error); -#endif -} - -void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write) -{ - pblk->sec_per_write = sec_per_write; -} - -int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - atomic_inc(&pblk->inflight_io); - -#ifdef CONFIG_NVM_PBLK_DEBUG - if (pblk_check_io(pblk, rqd)) - return NVM_IO_ERR; -#endif - - return nvm_submit_io(dev, rqd, buf); -} - -void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - int i; - - for (i = 0; i < rqd->nr_ppas; i++) { - struct ppa_addr *ppa = &ppa_list[i]; - struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa); - u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa); - - if (caddr == 0) - trace_pblk_chunk_state(pblk_disk_name(pblk), - ppa, NVM_CHK_ST_OPEN); - else if (caddr == (chunk->cnlb - 1)) - trace_pblk_chunk_state(pblk_disk_name(pblk), - ppa, NVM_CHK_ST_CLOSED); - } -} - -int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf) -{ - struct nvm_tgt_dev *dev = pblk->dev; - int ret; - - atomic_inc(&pblk->inflight_io); - -#ifdef CONFIG_NVM_PBLK_DEBUG - if (pblk_check_io(pblk, rqd)) - return NVM_IO_ERR; -#endif - - ret = nvm_submit_io_sync(dev, rqd, buf); - - if (trace_pblk_chunk_state_enabled() && !ret && - rqd->opcode == NVM_OP_PWRITE) - pblk_check_chunk_state_update(pblk, rqd); - - return ret; -} - -static int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd, - void *buf) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - int ret; - - pblk_down_chunk(pblk, ppa_list[0]); - ret = pblk_submit_io_sync(pblk, rqd, buf); - pblk_up_chunk(pblk, ppa_list[0]); - - return ret; -} - -int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush, bool skip_meta) -{ - int max = pblk->sec_per_write; - int min = pblk->min_write_pgs; - int secs_to_sync = 0; - - if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs) - min = max = pblk->min_write_pgs_data; - - if (secs_avail >= max) - secs_to_sync = max; - else if (secs_avail >= min) - secs_to_sync = min * (secs_avail / min); - else if (secs_to_flush) - secs_to_sync = min; - - return secs_to_sync; -} - -void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -{ - u64 addr; - int i; - - spin_lock(&line->lock); - addr = find_next_zero_bit(line->map_bitmap, - pblk->lm.sec_per_line, line->cur_sec); - line->cur_sec = addr - nr_secs; - - for (i = 0; i < nr_secs; i++, line->cur_sec--) - WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap)); - spin_unlock(&line->lock); -} - -u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -{ - u64 addr; - int i; - - lockdep_assert_held(&line->lock); - - /* logic error: ppa out-of-bounds. Prevent generating bad address */ - if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) { - WARN(1, "pblk: page allocation out of bounds\n"); - nr_secs = pblk->lm.sec_per_line - line->cur_sec; - } - - line->cur_sec = addr = find_next_zero_bit(line->map_bitmap, - pblk->lm.sec_per_line, line->cur_sec); - for (i = 0; i < nr_secs; i++, line->cur_sec++) - WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap)); - - return addr; -} - -u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -{ - u64 addr; - - /* Lock needed in case a write fails and a recovery needs to remap - * failed write buffer entries - */ - spin_lock(&line->lock); - addr = __pblk_alloc_page(pblk, line, nr_secs); - line->left_msecs -= nr_secs; - WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n"); - spin_unlock(&line->lock); - - return addr; -} - -u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line) -{ - u64 paddr; - - spin_lock(&line->lock); - paddr = find_next_zero_bit(line->map_bitmap, - pblk->lm.sec_per_line, line->cur_sec); - spin_unlock(&line->lock); - - return paddr; -} - -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int bit; - - /* This usually only happens on bad lines */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (bit >= lm->blk_per_line) - return -1; - - return bit * geo->ws_opt; -} - -int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct ppa_addr *ppa_list; - struct nvm_rq rqd; - u64 paddr = pblk_line_smeta_start(pblk, line); - int i, ret; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = pblk_alloc_rqd_meta(pblk, &rqd); - if (ret) - return ret; - - rqd.opcode = NVM_OP_PREAD; - rqd.nr_ppas = lm->smeta_sec; - rqd.is_seq = 1; - ppa_list = nvm_rq_to_ppa_list(&rqd); - - for (i = 0; i < lm->smeta_sec; i++, paddr++) - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - ret = pblk_submit_io_sync(pblk, &rqd, line->smeta); - if (ret) { - pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); - goto clear_rqd; - } - - atomic_dec(&pblk->inflight_io); - - if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { - pblk_log_read_err(pblk, &rqd); - ret = -EIO; - } - -clear_rqd: - pblk_free_rqd_meta(pblk, &rqd); - return ret; -} - -static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, - u64 paddr) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct ppa_addr *ppa_list; - struct nvm_rq rqd; - __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - int i, ret; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = pblk_alloc_rqd_meta(pblk, &rqd); - if (ret) - return ret; - - rqd.opcode = NVM_OP_PWRITE; - rqd.nr_ppas = lm->smeta_sec; - rqd.is_seq = 1; - ppa_list = nvm_rq_to_ppa_list(&rqd); - - for (i = 0; i < lm->smeta_sec; i++, paddr++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, - rqd.meta_list, i); - - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - meta->lba = lba_list[paddr] = addr_empty; - } - - ret = pblk_submit_io_sync_sem(pblk, &rqd, line->smeta); - if (ret) { - pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); - goto clear_rqd; - } - - atomic_dec(&pblk->inflight_io); - - if (rqd.error) { - pblk_log_write_err(pblk, &rqd); - ret = -EIO; - } - -clear_rqd: - pblk_free_rqd_meta(pblk, &rqd); - return ret; -} - -int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - void *ppa_list_buf, *meta_list; - struct ppa_addr *ppa_list; - struct nvm_rq rqd; - u64 paddr = line->emeta_ssec; - dma_addr_t dma_ppa_list, dma_meta_list; - int min = pblk->min_write_pgs; - int left_ppas = lm->emeta_sec[0]; - int line_id = line->id; - int rq_ppas, rq_len; - int i, j; - int ret; - - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &dma_meta_list); - if (!meta_list) - return -ENOMEM; - - ppa_list_buf = meta_list + pblk_dma_meta_size(pblk); - dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); - -next_rq: - memset(&rqd, 0, sizeof(struct nvm_rq)); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); - rq_len = rq_ppas * geo->csecs; - - rqd.meta_list = meta_list; - rqd.ppa_list = ppa_list_buf; - rqd.dma_meta_list = dma_meta_list; - rqd.dma_ppa_list = dma_ppa_list; - rqd.opcode = NVM_OP_PREAD; - rqd.nr_ppas = rq_ppas; - ppa_list = nvm_rq_to_ppa_list(&rqd); - - for (i = 0; i < rqd.nr_ppas; ) { - struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); - int pos = pblk_ppa_to_pos(geo, ppa); - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd.is_seq = 1; - - while (test_bit(pos, line->blk_bitmap)) { - paddr += min; - if (pblk_boundary_paddr_checks(pblk, paddr)) { - ret = -EINTR; - goto free_rqd_dma; - } - - ppa = addr_to_gen_ppa(pblk, paddr, line_id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - if (pblk_boundary_paddr_checks(pblk, paddr + min)) { - ret = -EINTR; - goto free_rqd_dma; - } - - for (j = 0; j < min; j++, i++, paddr++) - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); - } - - ret = pblk_submit_io_sync(pblk, &rqd, emeta_buf); - if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); - goto free_rqd_dma; - } - - atomic_dec(&pblk->inflight_io); - - if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { - pblk_log_read_err(pblk, &rqd); - ret = -EIO; - goto free_rqd_dma; - } - - emeta_buf += rq_len; - left_ppas -= rq_ppas; - if (left_ppas) - goto next_rq; - -free_rqd_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - return ret; -} - -static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct ppa_addr ppa) -{ - rqd->opcode = NVM_OP_ERASE; - rqd->ppa_addr = ppa; - rqd->nr_ppas = 1; - rqd->is_seq = 1; - rqd->bio = NULL; -} - -static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_rq rqd = {NULL}; - int ret; - - trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa, - PBLK_CHUNK_RESET_START); - - pblk_setup_e_rq(pblk, &rqd, ppa); - - /* The write thread schedules erases so that it minimizes disturbances - * with writes. Thus, there is no need to take the LUN semaphore. - */ - ret = pblk_submit_io_sync(pblk, &rqd, NULL); - rqd.private = pblk; - __pblk_end_io_erase(pblk, &rqd); - - return ret; -} - -int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct ppa_addr ppa; - int ret, bit = -1; - - /* Erase only good blocks, one at a time */ - do { - spin_lock(&line->lock); - bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line, - bit + 1); - if (bit >= lm->blk_per_line) { - spin_unlock(&line->lock); - break; - } - - ppa = pblk->luns[bit].bppa; /* set ch and lun */ - ppa.a.blk = line->id; - - atomic_dec(&line->left_eblks); - WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); - spin_unlock(&line->lock); - - ret = pblk_blk_erase_sync(pblk, ppa); - if (ret) { - pblk_err(pblk, "failed to erase line %d\n", line->id); - return ret; - } - } while (1); - - return 0; -} - -static void pblk_line_setup_metadata(struct pblk_line *line, - struct pblk_line_mgmt *l_mg, - struct pblk_line_meta *lm) -{ - int meta_line; - - lockdep_assert_held(&l_mg->free_lock); - -retry_meta: - meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); - if (meta_line == PBLK_DATA_LINES) { - spin_unlock(&l_mg->free_lock); - io_schedule(); - spin_lock(&l_mg->free_lock); - goto retry_meta; - } - - set_bit(meta_line, &l_mg->meta_bitmap); - line->meta_line = meta_line; - - line->smeta = l_mg->sline_meta[meta_line]; - line->emeta = l_mg->eline_meta[meta_line]; - - memset(line->smeta, 0, lm->smeta_len); - memset(line->emeta->buf, 0, lm->emeta_len[0]); - - line->emeta->mem = 0; - atomic_set(&line->emeta->sync, 0); -} - -/* For now lines are always assumed full lines. Thus, smeta former and current - * lun bitmaps are omitted. - */ -static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, - struct pblk_line *cur) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_emeta *emeta = line->emeta; - struct line_emeta *emeta_buf = emeta->buf; - struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta; - int nr_blk_line; - - /* After erasing the line, new bad blocks might appear and we risk - * having an invalid line - */ - nr_blk_line = lm->blk_per_line - - bitmap_weight(line->blk_bitmap, lm->blk_per_line); - if (nr_blk_line < lm->min_blk_line) { - spin_lock(&l_mg->free_lock); - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_BAD; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_add_tail(&line->list, &l_mg->bad_list); - spin_unlock(&l_mg->free_lock); - - pblk_debug(pblk, "line %d is bad\n", line->id); - - return 0; - } - - /* Run-time metadata */ - line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta); - - /* Mark LUNs allocated in this line (all for now) */ - bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len); - - smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); - export_guid(smeta_buf->header.uuid, &pblk->instance_uuid); - smeta_buf->header.id = cpu_to_le32(line->id); - smeta_buf->header.type = cpu_to_le16(line->type); - smeta_buf->header.version_major = SMETA_VERSION_MAJOR; - smeta_buf->header.version_minor = SMETA_VERSION_MINOR; - - /* Start metadata */ - smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); - smeta_buf->window_wr_lun = cpu_to_le32(geo->all_luns); - - /* Fill metadata among lines */ - if (cur) { - memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len); - smeta_buf->prev_id = cpu_to_le32(cur->id); - cur->emeta->buf->next_id = cpu_to_le32(line->id); - } else { - smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY); - } - - /* All smeta must be set at this point */ - smeta_buf->header.crc = cpu_to_le32( - pblk_calc_meta_header_crc(pblk, &smeta_buf->header)); - smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf)); - - /* End metadata */ - memcpy(&emeta_buf->header, &smeta_buf->header, - sizeof(struct line_header)); - - emeta_buf->header.version_major = EMETA_VERSION_MAJOR; - emeta_buf->header.version_minor = EMETA_VERSION_MINOR; - emeta_buf->header.crc = cpu_to_le32( - pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); - - emeta_buf->seq_nr = cpu_to_le64(line->seq_nr); - emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line); - emeta_buf->nr_valid_lbas = cpu_to_le64(0); - emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY); - emeta_buf->crc = cpu_to_le32(0); - emeta_buf->prev_id = smeta_buf->prev_id; - - return 1; -} - -static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); - if (!line->map_bitmap) - return -ENOMEM; - - memset(line->map_bitmap, 0, lm->sec_bitmap_len); - - /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); - if (!line->invalid_bitmap) { - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - return -ENOMEM; - } - - return 0; -} - -/* For now lines are always assumed full lines. Thus, smeta former and current - * lun bitmaps are omitted. - */ -static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, - int init) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - u64 off; - int bit = -1; - int emeta_secs; - - line->sec_in_line = lm->sec_per_line; - - /* Capture bad block information on line mapping bitmaps */ - while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line, - bit + 1)) < lm->blk_per_line) { - off = bit * geo->ws_opt; - bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off, - lm->sec_per_line); - bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, - lm->sec_per_line); - line->sec_in_line -= geo->clba; - } - - /* Mark smeta metadata sectors as bad sectors */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - off = bit * geo->ws_opt; - bitmap_set(line->map_bitmap, off, lm->smeta_sec); - line->sec_in_line -= lm->smeta_sec; - line->cur_sec = off + lm->smeta_sec; - - if (init && pblk_line_smeta_write(pblk, line, off)) { - pblk_debug(pblk, "line smeta I/O failed. Retry\n"); - return 0; - } - - bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); - - /* Mark emeta metadata sectors as bad sectors. We need to consider bad - * blocks to make sure that there are enough sectors to store emeta - */ - emeta_secs = lm->emeta_sec[0]; - off = lm->sec_per_line; - while (emeta_secs) { - off -= geo->ws_opt; - if (!test_bit(off, line->invalid_bitmap)) { - bitmap_set(line->invalid_bitmap, off, geo->ws_opt); - emeta_secs -= geo->ws_opt; - } - } - - line->emeta_ssec = off; - line->sec_in_line -= lm->emeta_sec[0]; - line->nr_valid_lbas = 0; - line->left_msecs = line->sec_in_line; - *line->vsc = cpu_to_le32(line->sec_in_line); - - if (lm->sec_per_line - line->sec_in_line != - bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_BAD; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_add_tail(&line->list, &l_mg->bad_list); - pblk_err(pblk, "unexpected line %d is bad\n", line->id); - - return 0; - } - - return 1; -} - -static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int blk_to_erase = atomic_read(&line->blk_in_line); - int i; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - int state = line->chks[pos].state; - - /* Free chunks should not be erased */ - if (state & NVM_CHK_ST_FREE) { - set_bit(pblk_ppa_to_pos(geo, rlun->bppa), - line->erase_bitmap); - blk_to_erase--; - } - } - - return blk_to_erase; -} - -static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int blk_in_line = atomic_read(&line->blk_in_line); - int blk_to_erase; - - /* Bad blocks do not need to be erased */ - bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); - - spin_lock(&line->lock); - - /* If we have not written to this line, we need to mark up free chunks - * as already erased - */ - if (line->state == PBLK_LINESTATE_NEW) { - blk_to_erase = pblk_prepare_new_line(pblk, line); - line->state = PBLK_LINESTATE_FREE; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - } else { - blk_to_erase = blk_in_line; - } - - if (blk_in_line < lm->min_blk_line) { - spin_unlock(&line->lock); - return -EAGAIN; - } - - if (line->state != PBLK_LINESTATE_FREE) { - WARN(1, "pblk: corrupted line %d, state %d\n", - line->id, line->state); - spin_unlock(&line->lock); - return -EINTR; - } - - line->state = PBLK_LINESTATE_OPEN; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - atomic_set(&line->left_eblks, blk_to_erase); - atomic_set(&line->left_seblks, blk_to_erase); - - line->meta_distance = lm->meta_distance; - spin_unlock(&line->lock); - - kref_init(&line->ref); - atomic_set(&line->sec_to_update, 0); - - return 0; -} - -/* Line allocations in the recovery path are always single threaded */ -int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int ret; - - spin_lock(&l_mg->free_lock); - l_mg->data_line = line; - list_del(&line->list); - - ret = pblk_line_prepare(pblk, line); - if (ret) { - list_add(&line->list, &l_mg->free_list); - spin_unlock(&l_mg->free_lock); - return ret; - } - spin_unlock(&l_mg->free_lock); - - ret = pblk_line_alloc_bitmaps(pblk, line); - if (ret) - goto fail; - - if (!pblk_line_init_bb(pblk, line, 0)) { - ret = -EINTR; - goto fail; - } - - pblk_rl_free_lines_dec(&pblk->rl, line, true); - return 0; - -fail: - spin_lock(&l_mg->free_lock); - list_add(&line->list, &l_mg->free_list); - spin_unlock(&l_mg->free_lock); - - return ret; -} - -void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; -} - -static void pblk_line_reinit(struct pblk_line *line) -{ - *line->vsc = cpu_to_le32(EMPTY_ENTRY); - - line->map_bitmap = NULL; - line->invalid_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; -} - -void pblk_line_free(struct pblk_line *line) -{ - struct pblk *pblk = line->pblk; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - mempool_free(line->invalid_bitmap, l_mg->bitmap_pool); - - pblk_line_reinit(line); -} - -struct pblk_line *pblk_line_get(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line; - int ret, bit; - - lockdep_assert_held(&l_mg->free_lock); - -retry: - if (list_empty(&l_mg->free_list)) { - pblk_err(pblk, "no free lines\n"); - return NULL; - } - - line = list_first_entry(&l_mg->free_list, struct pblk_line, list); - list_del(&line->list); - l_mg->nr_free_lines--; - - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (unlikely(bit >= lm->blk_per_line)) { - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_BAD; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_add_tail(&line->list, &l_mg->bad_list); - - pblk_debug(pblk, "line %d is bad\n", line->id); - goto retry; - } - - ret = pblk_line_prepare(pblk, line); - if (ret) { - switch (ret) { - case -EAGAIN: - list_add(&line->list, &l_mg->bad_list); - goto retry; - case -EINTR: - list_add(&line->list, &l_mg->corrupt_list); - goto retry; - default: - pblk_err(pblk, "failed to prepare line %d\n", line->id); - list_add(&line->list, &l_mg->free_list); - l_mg->nr_free_lines++; - return NULL; - } - } - - return line; -} - -static struct pblk_line *pblk_line_retry(struct pblk *pblk, - struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *retry_line; - -retry: - spin_lock(&l_mg->free_lock); - retry_line = pblk_line_get(pblk); - if (!retry_line) { - l_mg->data_line = NULL; - spin_unlock(&l_mg->free_lock); - return NULL; - } - - retry_line->map_bitmap = line->map_bitmap; - retry_line->invalid_bitmap = line->invalid_bitmap; - retry_line->smeta = line->smeta; - retry_line->emeta = line->emeta; - retry_line->meta_line = line->meta_line; - - pblk_line_reinit(line); - - l_mg->data_line = retry_line; - spin_unlock(&l_mg->free_lock); - - pblk_rl_free_lines_dec(&pblk->rl, line, false); - - if (pblk_line_erase(pblk, retry_line)) - goto retry; - - return retry_line; -} - -static void pblk_set_space_limit(struct pblk *pblk) -{ - struct pblk_rl *rl = &pblk->rl; - - atomic_set(&rl->rb_space, 0); -} - -struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - - spin_lock(&l_mg->free_lock); - line = pblk_line_get(pblk); - if (!line) { - spin_unlock(&l_mg->free_lock); - return NULL; - } - - line->seq_nr = l_mg->d_seq_nr++; - line->type = PBLK_LINETYPE_DATA; - l_mg->data_line = line; - - pblk_line_setup_metadata(line, l_mg, &pblk->lm); - - /* Allocate next line for preparation */ - l_mg->data_next = pblk_line_get(pblk); - if (!l_mg->data_next) { - /* If we cannot get a new line, we need to stop the pipeline. - * Only allow as many writes in as we can store safely and then - * fail gracefully - */ - pblk_set_space_limit(pblk); - - l_mg->data_next = NULL; - } else { - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; - l_mg->data_next->type = PBLK_LINETYPE_DATA; - } - spin_unlock(&l_mg->free_lock); - - if (pblk_line_alloc_bitmaps(pblk, line)) - return NULL; - - if (pblk_line_erase(pblk, line)) { - line = pblk_line_retry(pblk, line); - if (!line) - return NULL; - } - -retry_setup: - if (!pblk_line_init_metadata(pblk, line, NULL)) { - line = pblk_line_retry(pblk, line); - if (!line) - return NULL; - - goto retry_setup; - } - - if (!pblk_line_init_bb(pblk, line, 1)) { - line = pblk_line_retry(pblk, line); - if (!line) - return NULL; - - goto retry_setup; - } - - pblk_rl_free_lines_dec(&pblk->rl, line, true); - - return line; -} - -void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) -{ - struct pblk_line *line; - - line = pblk_ppa_to_line(pblk, ppa); - kref_put(&line->ref, pblk_line_put_wq); -} - -void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - int i; - - for (i = 0; i < rqd->nr_ppas; i++) - pblk_ppa_to_line_put(pblk, ppa_list[i]); -} - -static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) -{ - lockdep_assert_held(&pblk->l_mg.free_lock); - - pblk_set_space_limit(pblk); - pblk->state = PBLK_STATE_STOPPING; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); -} - -static void pblk_line_close_meta_sync(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line, *tline; - LIST_HEAD(list); - - spin_lock(&l_mg->close_lock); - if (list_empty(&l_mg->emeta_list)) { - spin_unlock(&l_mg->close_lock); - return; - } - - list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); - spin_unlock(&l_mg->close_lock); - - list_for_each_entry_safe(line, tline, &list, list) { - struct pblk_emeta *emeta = line->emeta; - - while (emeta->mem < lm->emeta_len[0]) { - int ret; - - ret = pblk_submit_meta_io(pblk, line); - if (ret) { - pblk_err(pblk, "sync meta line %d failed (%d)\n", - line->id, ret); - return; - } - } - } - - pblk_wait_for_meta(pblk); - flush_workqueue(pblk->close_wq); -} - -void __pblk_pipeline_flush(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int ret; - - spin_lock(&l_mg->free_lock); - if (pblk->state == PBLK_STATE_RECOVERING || - pblk->state == PBLK_STATE_STOPPED) { - spin_unlock(&l_mg->free_lock); - return; - } - pblk->state = PBLK_STATE_RECOVERING; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); - spin_unlock(&l_mg->free_lock); - - pblk_flush_writer(pblk); - pblk_wait_for_meta(pblk); - - ret = pblk_recov_pad(pblk); - if (ret) { - pblk_err(pblk, "could not close data on teardown(%d)\n", ret); - return; - } - - flush_workqueue(pblk->bb_wq); - pblk_line_close_meta_sync(pblk); -} - -void __pblk_pipeline_stop(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - spin_lock(&l_mg->free_lock); - pblk->state = PBLK_STATE_STOPPED; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); - l_mg->data_line = NULL; - l_mg->data_next = NULL; - spin_unlock(&l_mg->free_lock); -} - -void pblk_pipeline_stop(struct pblk *pblk) -{ - __pblk_pipeline_flush(pblk); - __pblk_pipeline_stop(pblk); -} - -struct pblk_line *pblk_line_replace_data(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *cur, *new = NULL; - unsigned int left_seblks; - - new = l_mg->data_next; - if (!new) - goto out; - - spin_lock(&l_mg->free_lock); - cur = l_mg->data_line; - l_mg->data_line = new; - - pblk_line_setup_metadata(new, l_mg, &pblk->lm); - spin_unlock(&l_mg->free_lock); - -retry_erase: - left_seblks = atomic_read(&new->left_seblks); - if (left_seblks) { - /* If line is not fully erased, erase it */ - if (atomic_read(&new->left_eblks)) { - if (pblk_line_erase(pblk, new)) - goto out; - } else { - io_schedule(); - } - goto retry_erase; - } - - if (pblk_line_alloc_bitmaps(pblk, new)) - return NULL; - -retry_setup: - if (!pblk_line_init_metadata(pblk, new, cur)) { - new = pblk_line_retry(pblk, new); - if (!new) - goto out; - - goto retry_setup; - } - - if (!pblk_line_init_bb(pblk, new, 1)) { - new = pblk_line_retry(pblk, new); - if (!new) - goto out; - - goto retry_setup; - } - - pblk_rl_free_lines_dec(&pblk->rl, new, true); - - /* Allocate next line for preparation */ - spin_lock(&l_mg->free_lock); - l_mg->data_next = pblk_line_get(pblk); - if (!l_mg->data_next) { - /* If we cannot get a new line, we need to stop the pipeline. - * Only allow as many writes in as we can store safely and then - * fail gracefully - */ - pblk_stop_writes(pblk, new); - l_mg->data_next = NULL; - } else { - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; - l_mg->data_next->type = PBLK_LINETYPE_DATA; - } - spin_unlock(&l_mg->free_lock); - -out: - return new; -} - -static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_GC); - if (line->w_err_gc->has_gc_err) { - spin_unlock(&line->lock); - pblk_err(pblk, "line %d had errors during GC\n", line->id); - pblk_put_line_back(pblk, line); - line->w_err_gc->has_gc_err = 0; - return; - } - - line->state = PBLK_LINESTATE_FREE; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - line->gc_group = PBLK_LINEGC_NONE; - pblk_line_free(line); - - if (line->w_err_gc->has_write_err) { - pblk_rl_werr_line_out(&pblk->rl); - line->w_err_gc->has_write_err = 0; - } - - spin_unlock(&line->lock); - atomic_dec(&gc->pipeline_gc); - - spin_lock(&l_mg->free_lock); - list_add_tail(&line->list, &l_mg->free_list); - l_mg->nr_free_lines++; - spin_unlock(&l_mg->free_lock); - - pblk_rl_free_lines_inc(&pblk->rl, line); -} - -static void pblk_line_put_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_put_ws = container_of(work, - struct pblk_line_ws, ws); - struct pblk *pblk = line_put_ws->pblk; - struct pblk_line *line = line_put_ws->line; - - __pblk_line_put(pblk, line); - mempool_free(line_put_ws, &pblk->gen_ws_pool); -} - -void pblk_line_put(struct kref *ref) -{ - struct pblk_line *line = container_of(ref, struct pblk_line, ref); - struct pblk *pblk = line->pblk; - - __pblk_line_put(pblk, line); -} - -void pblk_line_put_wq(struct kref *ref) -{ - struct pblk_line *line = container_of(ref, struct pblk_line, ref); - struct pblk *pblk = line->pblk; - struct pblk_line_ws *line_put_ws; - - line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC); - if (!line_put_ws) - return; - - line_put_ws->pblk = pblk; - line_put_ws->line = line; - line_put_ws->priv = NULL; - - INIT_WORK(&line_put_ws->ws, pblk_line_put_ws); - queue_work(pblk->r_end_wq, &line_put_ws->ws); -} - -int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_rq *rqd; - int err; - - rqd = pblk_alloc_rqd(pblk, PBLK_ERASE); - - pblk_setup_e_rq(pblk, rqd, ppa); - - rqd->end_io = pblk_end_io_erase; - rqd->private = pblk; - - trace_pblk_chunk_reset(pblk_disk_name(pblk), - &ppa, PBLK_CHUNK_RESET_START); - - /* The write thread schedules erases so that it minimizes disturbances - * with writes. Thus, there is no need to take the LUN semaphore. - */ - err = pblk_submit_io(pblk, rqd, NULL); - if (err) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - pblk_err(pblk, "could not async erase line:%d,blk:%d\n", - pblk_ppa_to_line_id(ppa), - pblk_ppa_to_pos(geo, ppa)); - } - - return err; -} - -struct pblk_line *pblk_line_get_data(struct pblk *pblk) -{ - return pblk->l_mg.data_line; -} - -/* For now, always erase next line */ -struct pblk_line *pblk_line_get_erase(struct pblk *pblk) -{ - return pblk->l_mg.data_next; -} - -int pblk_line_is_full(struct pblk_line *line) -{ - return (line->left_msecs == 0); -} - -static void pblk_line_should_sync_meta(struct pblk *pblk) -{ - if (pblk_rl_is_limit(&pblk->rl)) - pblk_line_close_meta_sync(pblk); -} - -void pblk_line_close(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list; - int i; - -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line), - "pblk: corrupt closed line %d\n", line->id); -#endif - - spin_lock(&l_mg->free_lock); - WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap)); - spin_unlock(&l_mg->free_lock); - - spin_lock(&l_mg->gc_lock); - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_OPEN); - line->state = PBLK_LINESTATE_CLOSED; - move_list = pblk_line_gc_list(pblk, line); - list_add_tail(&line->list, move_list); - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - int state = line->chks[pos].state; - - if (!(state & NVM_CHK_ST_OFFLINE)) - state = NVM_CHK_ST_CLOSED; - } - - spin_unlock(&line->lock); - spin_unlock(&l_mg->gc_lock); - - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); -} - -void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_emeta *emeta = line->emeta; - struct line_emeta *emeta_buf = emeta->buf; - struct wa_counters *wa = emeta_to_wa(lm, emeta_buf); - - /* No need for exact vsc value; avoid a big line lock and take aprox. */ - memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len); - memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len); - - wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa)); - wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa)); - wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa)); - - if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) { - emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); - export_guid(emeta_buf->header.uuid, &pblk->instance_uuid); - emeta_buf->header.id = cpu_to_le32(line->id); - emeta_buf->header.type = cpu_to_le16(line->type); - emeta_buf->header.version_major = EMETA_VERSION_MAJOR; - emeta_buf->header.version_minor = EMETA_VERSION_MINOR; - emeta_buf->header.crc = cpu_to_le32( - pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); - } - - emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); - emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); - - spin_lock(&l_mg->close_lock); - spin_lock(&line->lock); - - /* Update the in-memory start address for emeta, in case it has - * shifted due to write errors - */ - if (line->emeta_ssec != line->cur_sec) - line->emeta_ssec = line->cur_sec; - - list_add_tail(&line->list, &l_mg->emeta_list); - spin_unlock(&line->lock); - spin_unlock(&l_mg->close_lock); - - pblk_line_should_sync_meta(pblk); -} - -static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - unsigned int lba_list_size = lm->emeta_len[2]; - struct pblk_w_err_gc *w_err_gc = line->w_err_gc; - struct pblk_emeta *emeta = line->emeta; - - w_err_gc->lba_list = kvmalloc(lba_list_size, GFP_KERNEL); - memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf), - lba_list_size); -} - -void pblk_line_close_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct pblk_line *line = line_ws->line; - struct pblk_w_err_gc *w_err_gc = line->w_err_gc; - - /* Write errors makes the emeta start address stored in smeta invalid, - * so keep a copy of the lba list until we've gc'd the line - */ - if (w_err_gc->has_write_err) - pblk_save_lba_list(pblk, line); - - pblk_line_close(pblk, line); - mempool_free(line_ws, &pblk->gen_ws_pool); -} - -void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), gfp_t gfp_mask, - struct workqueue_struct *wq) -{ - struct pblk_line_ws *line_ws; - - line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask); - if (!line_ws) { - pblk_err(pblk, "pblk: could not allocate memory\n"); - return; - } - - line_ws->pblk = pblk; - line_ws->line = line; - line_ws->priv = priv; - - INIT_WORK(&line_ws->ws, work); - queue_work(wq, &line_ws->ws); -} - -static void __pblk_down_chunk(struct pblk *pblk, int pos) -{ - struct pblk_lun *rlun = &pblk->luns[pos]; - int ret; - - /* - * Only send one inflight I/O per LUN. Since we map at a page - * granurality, all ppas in the I/O will map to the same LUN - */ - - ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); - if (ret == -ETIME || ret == -EINTR) - pblk_err(pblk, "taking lun semaphore timed out: err %d\n", - -ret); -} - -void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa); - - __pblk_down_chunk(pblk, pos); -} - -void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, - unsigned long *lun_bitmap) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa); - - /* If the LUN has been locked for this same request, do no attempt to - * lock it again - */ - if (test_and_set_bit(pos, lun_bitmap)) - return; - - __pblk_down_chunk(pblk, pos); -} - -void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int pos = pblk_ppa_to_pos(geo, ppa); - - rlun = &pblk->luns[pos]; - up(&rlun->wr_sem); -} - -void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int num_lun = geo->all_luns; - int bit = -1; - - while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) { - rlun = &pblk->luns[bit]; - up(&rlun->wr_sem); - } -} - -void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) -{ - struct ppa_addr ppa_l2p; - - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - return; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - - if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)) - pblk_map_invalidate(pblk, ppa_l2p); - - pblk_trans_map_set(pblk, lba, ppa); - spin_unlock(&pblk->trans_lock); -} - -void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) -{ - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(!pblk_addr_in_cache(ppa)); - BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); -#endif - - pblk_update_map(pblk, lba, ppa); -} - -int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, - struct pblk_line *gc_line, u64 paddr_gc) -{ - struct ppa_addr ppa_l2p, ppa_gc; - int ret = 1; - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(!pblk_addr_in_cache(ppa_new)); - BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new))); -#endif - - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - return 0; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id); - - if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) { - spin_lock(&gc_line->lock); - WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap), - "pblk: corrupted GC update"); - spin_unlock(&gc_line->lock); - - ret = 0; - goto out; - } - - pblk_trans_map_set(pblk, lba, ppa_new); -out: - spin_unlock(&pblk->trans_lock); - return ret; -} - -void pblk_update_map_dev(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache) -{ - struct ppa_addr ppa_l2p; - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a device address */ - BUG_ON(pblk_addr_in_cache(ppa_mapped)); -#endif - /* Invalidate and discard padded entries */ - if (lba == ADDR_EMPTY) { - atomic64_inc(&pblk->pad_wa); -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->padded_wb); -#endif - if (!pblk_ppa_empty(ppa_mapped)) - pblk_map_invalidate(pblk, ppa_mapped); - return; - } - - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - return; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - - /* Do not update L2P if the cacheline has been updated. In this case, - * the mapped ppa must be invalidated - */ - if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) { - if (!pblk_ppa_empty(ppa_mapped)) - pblk_map_invalidate(pblk, ppa_mapped); - goto out; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)); -#endif - - pblk_trans_map_set(pblk, lba, ppa_mapped); -out: - spin_unlock(&pblk->trans_lock); -} - -int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, - sector_t blba, int nr_secs, bool *from_cache) -{ - int i; - - spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_secs; i++) { - struct ppa_addr ppa; - - ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i); - - /* If the L2P entry maps to a line, the reference is valid */ - if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { - struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); - - if (i > 0 && *from_cache) - break; - *from_cache = false; - - kref_get(&line->ref); - } else { - if (i > 0 && !*from_cache) - break; - *from_cache = true; - } - } - spin_unlock(&pblk->trans_lock); - return i; -} - -void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, - u64 *lba_list, int nr_secs) -{ - u64 lba; - int i; - - spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_secs; i++) { - lba = lba_list[i]; - if (lba != ADDR_EMPTY) { - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - continue; - } - ppas[i] = pblk_trans_map_get(pblk, lba); - } - } - spin_unlock(&pblk->trans_lock); -} - -void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd) -{ - void *buffer; - - if (pblk_is_oob_meta_supported(pblk)) { - /* Just use OOB metadata buffer as always */ - buffer = rqd->meta_list; - } else { - /* We need to reuse last page of request (packed metadata) - * in similar way as traditional oob metadata - */ - buffer = page_to_virt( - rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); - } - - return buffer; -} - -void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd) -{ - void *meta_list = rqd->meta_list; - void *page; - int i = 0; - - if (pblk_is_oob_meta_supported(pblk)) - return; - - page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); - /* We need to fill oob meta buffer with data from packed metadata */ - for (; i < rqd->nr_ppas; i++) - memcpy(pblk_get_meta(pblk, meta_list, i), - page + (i * sizeof(struct pblk_sec_meta)), - sizeof(struct pblk_sec_meta)); -} diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c deleted file mode 100644 index b31658be35a7..000000000000 --- a/drivers/lightnvm/pblk-gc.c +++ /dev/null @@ -1,726 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-gc.c - pblk's garbage collector - */ - -#include "pblk.h" -#include "pblk-trace.h" -#include - - -static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) -{ - vfree(gc_rq->data); - kfree(gc_rq); -} - -static int pblk_gc_write(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - struct pblk_gc_rq *gc_rq, *tgc_rq; - LIST_HEAD(w_list); - - spin_lock(&gc->w_lock); - if (list_empty(&gc->w_list)) { - spin_unlock(&gc->w_lock); - return 1; - } - - list_cut_position(&w_list, &gc->w_list, gc->w_list.prev); - gc->w_entries = 0; - spin_unlock(&gc->w_lock); - - list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { - pblk_write_gc_to_cache(pblk, gc_rq); - list_del(&gc_rq->list); - kref_put(&gc_rq->line->ref, pblk_line_put); - pblk_gc_free_gc_rq(gc_rq); - } - - return 0; -} - -static void pblk_gc_writer_kick(struct pblk_gc *gc) -{ - wake_up_process(gc->gc_writer_ts); -} - -void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list; - - spin_lock(&l_mg->gc_lock); - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_GC); - line->state = PBLK_LINESTATE_CLOSED; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - /* We need to reset gc_group in order to ensure that - * pblk_line_gc_list will return proper move_list - * since right now current line is not on any of the - * gc lists. - */ - line->gc_group = PBLK_LINEGC_NONE; - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - list_add_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); -} - -static void pblk_gc_line_ws(struct work_struct *work) -{ - struct pblk_line_ws *gc_rq_ws = container_of(work, - struct pblk_line_ws, ws); - struct pblk *pblk = gc_rq_ws->pblk; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line = gc_rq_ws->line; - struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; - int ret; - - up(&gc->gc_sem); - - /* Read from GC victim block */ - ret = pblk_submit_read_gc(pblk, gc_rq); - if (ret) { - line->w_err_gc->has_gc_err = 1; - goto out; - } - - if (!gc_rq->secs_to_gc) - goto out; - -retry: - spin_lock(&gc->w_lock); - if (gc->w_entries >= PBLK_GC_RQ_QD) { - spin_unlock(&gc->w_lock); - pblk_gc_writer_kick(&pblk->gc); - usleep_range(128, 256); - goto retry; - } - gc->w_entries++; - list_add_tail(&gc_rq->list, &gc->w_list); - spin_unlock(&gc->w_lock); - - pblk_gc_writer_kick(&pblk->gc); - - kfree(gc_rq_ws); - return; - -out: - pblk_gc_free_gc_rq(gc_rq); - kref_put(&line->ref, pblk_line_put); - kfree(gc_rq_ws); -} - -static __le64 *get_lba_list_from_emeta(struct pblk *pblk, - struct pblk_line *line) -{ - struct line_emeta *emeta_buf; - struct pblk_line_meta *lm = &pblk->lm; - unsigned int lba_list_size = lm->emeta_len[2]; - __le64 *lba_list; - int ret; - - emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL); - if (!emeta_buf) - return NULL; - - ret = pblk_line_emeta_read(pblk, line, emeta_buf); - if (ret) { - pblk_err(pblk, "line %d read emeta failed (%d)\n", - line->id, ret); - kvfree(emeta_buf); - return NULL; - } - - /* If this read fails, it means that emeta is corrupted. - * For now, leave the line untouched. - * TODO: Implement a recovery routine that scans and moves - * all sectors on the line. - */ - - ret = pblk_recov_check_emeta(pblk, emeta_buf); - if (ret) { - pblk_err(pblk, "inconsistent emeta (line %d)\n", - line->id); - kvfree(emeta_buf); - return NULL; - } - - lba_list = kvmalloc(lba_list_size, GFP_KERNEL); - - if (lba_list) - memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size); - - kvfree(emeta_buf); - - return lba_list; -} - -static void pblk_gc_line_prepare_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct pblk_line *line = line_ws->line; - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line_ws *gc_rq_ws; - struct pblk_gc_rq *gc_rq; - __le64 *lba_list; - unsigned long *invalid_bitmap; - int sec_left, nr_secs, bit; - - invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!invalid_bitmap) - goto fail_free_ws; - - if (line->w_err_gc->has_write_err) { - lba_list = line->w_err_gc->lba_list; - line->w_err_gc->lba_list = NULL; - } else { - lba_list = get_lba_list_from_emeta(pblk, line); - if (!lba_list) { - pblk_err(pblk, "could not interpret emeta (line %d)\n", - line->id); - goto fail_free_invalid_bitmap; - } - } - - spin_lock(&line->lock); - bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line); - sec_left = pblk_line_vsc(line); - spin_unlock(&line->lock); - - if (sec_left < 0) { - pblk_err(pblk, "corrupted GC line (%d)\n", line->id); - goto fail_free_lba_list; - } - - bit = -1; -next_rq: - gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); - if (!gc_rq) - goto fail_free_lba_list; - - nr_secs = 0; - do { - bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line, - bit + 1); - if (bit > line->emeta_ssec) - break; - - gc_rq->paddr_list[nr_secs] = bit; - gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]); - } while (nr_secs < pblk->max_write_pgs); - - if (unlikely(!nr_secs)) { - kfree(gc_rq); - goto out; - } - - gc_rq->nr_secs = nr_secs; - gc_rq->line = line; - - gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs)); - if (!gc_rq->data) - goto fail_free_gc_rq; - - gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); - if (!gc_rq_ws) - goto fail_free_gc_data; - - gc_rq_ws->pblk = pblk; - gc_rq_ws->line = line; - gc_rq_ws->priv = gc_rq; - - /* The write GC path can be much slower than the read GC one due to - * the budget imposed by the rate-limiter. Balance in case that we get - * back pressure from the write GC path. - */ - while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000))) - io_schedule(); - - kref_get(&line->ref); - - INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws); - queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws); - - sec_left -= nr_secs; - if (sec_left > 0) - goto next_rq; - -out: - kvfree(lba_list); - kfree(line_ws); - kfree(invalid_bitmap); - - kref_put(&line->ref, pblk_line_put); - atomic_dec(&gc->read_inflight_gc); - - return; - -fail_free_gc_data: - vfree(gc_rq->data); -fail_free_gc_rq: - kfree(gc_rq); -fail_free_lba_list: - kvfree(lba_list); -fail_free_invalid_bitmap: - kfree(invalid_bitmap); -fail_free_ws: - kfree(line_ws); - - /* Line goes back to closed state, so we cannot release additional - * reference for line, since we do that only when we want to do - * gc to free line state transition. - */ - pblk_put_line_back(pblk, line); - atomic_dec(&gc->read_inflight_gc); - - pblk_err(pblk, "failed to GC line %d\n", line->id); -} - -static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_gc *gc = &pblk->gc; - struct pblk_line_ws *line_ws; - - pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id); - - line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); - if (!line_ws) - return -ENOMEM; - - line_ws->pblk = pblk; - line_ws->line = line; - - atomic_inc(&gc->pipeline_gc); - INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws); - queue_work(gc->gc_reader_wq, &line_ws->ws); - - return 0; -} - -static void pblk_gc_reader_kick(struct pblk_gc *gc) -{ - wake_up_process(gc->gc_reader_ts); -} - -static void pblk_gc_kick(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - pblk_gc_writer_kick(gc); - pblk_gc_reader_kick(gc); - - /* If we're shutting down GC, let's not start it up again */ - if (gc->gc_enabled) { - wake_up_process(gc->gc_ts); - mod_timer(&gc->gc_timer, - jiffies + msecs_to_jiffies(GC_TIME_MSECS)); - } -} - -static int pblk_gc_read(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line; - - spin_lock(&gc->r_lock); - if (list_empty(&gc->r_list)) { - spin_unlock(&gc->r_lock); - return 1; - } - - line = list_first_entry(&gc->r_list, struct pblk_line, list); - list_del(&line->list); - spin_unlock(&gc->r_lock); - - pblk_gc_kick(pblk); - - if (pblk_gc_line(pblk, line)) { - pblk_err(pblk, "failed to GC line %d\n", line->id); - /* rollback */ - spin_lock(&gc->r_lock); - list_add_tail(&line->list, &gc->r_list); - spin_unlock(&gc->r_lock); - } - - return 0; -} - -static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, - struct list_head *group_list) -{ - struct pblk_line *line, *victim; - unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L; - - victim = list_first_entry(group_list, struct pblk_line, list); - - list_for_each_entry(line, group_list, list) { - if (!atomic_read(&line->sec_to_update)) - line_vsc = le32_to_cpu(*line->vsc); - if (line_vsc < victim_vsc) { - victim = line; - victim_vsc = le32_to_cpu(*victim->vsc); - } - } - - if (victim_vsc == ~0x0) - return NULL; - - return victim; -} - -static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) -{ - unsigned int nr_blocks_free, nr_blocks_need; - unsigned int werr_lines = atomic_read(&rl->werr_lines); - - nr_blocks_need = pblk_rl_high_thrs(rl); - nr_blocks_free = pblk_rl_nr_free_blks(rl); - - /* This is not critical, no need to take lock here */ - return ((werr_lines > 0) || - ((gc->gc_active) && (nr_blocks_need > nr_blocks_free))); -} - -void pblk_gc_free_full_lines(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line; - - do { - spin_lock(&l_mg->gc_lock); - if (list_empty(&l_mg->gc_full_list)) { - spin_unlock(&l_mg->gc_lock); - return; - } - - line = list_first_entry(&l_mg->gc_full_list, - struct pblk_line, list); - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_CLOSED); - line->state = PBLK_LINESTATE_GC; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_del(&line->list); - spin_unlock(&l_mg->gc_lock); - - atomic_inc(&gc->pipeline_gc); - kref_put(&line->ref, pblk_line_put); - } while (1); -} - -/* - * Lines with no valid sectors will be returned to the free list immediately. If - * GC is activated - either because the free block count is under the determined - * threshold, or because it is being forced from user space - only lines with a - * high count of invalid sectors will be recycled. - */ -static void pblk_gc_run(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line; - struct list_head *group_list; - bool run_gc; - int read_inflight_gc, gc_group = 0, prev_group = 0; - - pblk_gc_free_full_lines(pblk); - - run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD)) - return; - -next_gc_group: - group_list = l_mg->gc_lists[gc_group++]; - - do { - spin_lock(&l_mg->gc_lock); - - line = pblk_gc_get_victim_line(pblk, group_list); - if (!line) { - spin_unlock(&l_mg->gc_lock); - break; - } - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_CLOSED); - line->state = PBLK_LINESTATE_GC; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_del(&line->list); - spin_unlock(&l_mg->gc_lock); - - spin_lock(&gc->r_lock); - list_add_tail(&line->list, &gc->r_list); - spin_unlock(&gc->r_lock); - - read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc); - pblk_gc_reader_kick(gc); - - prev_group = 1; - - /* No need to queue up more GC lines than we can handle */ - run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD) - break; - } while (1); - - if (!prev_group && pblk->rl.rb_state > gc_group && - gc_group < PBLK_GC_NR_LISTS) - goto next_gc_group; -} - -static void pblk_gc_timer(struct timer_list *t) -{ - struct pblk *pblk = from_timer(pblk, t, gc.gc_timer); - - pblk_gc_kick(pblk); -} - -static int pblk_gc_ts(void *data) -{ - struct pblk *pblk = data; - - while (!kthread_should_stop()) { - pblk_gc_run(pblk); - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - - return 0; -} - -static int pblk_gc_writer_ts(void *data) -{ - struct pblk *pblk = data; - - while (!kthread_should_stop()) { - if (!pblk_gc_write(pblk)) - continue; - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - - return 0; -} - -static int pblk_gc_reader_ts(void *data) -{ - struct pblk *pblk = data; - struct pblk_gc *gc = &pblk->gc; - - while (!kthread_should_stop()) { - if (!pblk_gc_read(pblk)) - continue; - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_info(pblk, "flushing gc pipeline, %d lines left\n", - atomic_read(&gc->pipeline_gc)); -#endif - - do { - if (!atomic_read(&gc->pipeline_gc)) - break; - - schedule(); - } while (1); - - return 0; -} - -static void pblk_gc_start(struct pblk *pblk) -{ - pblk->gc.gc_active = 1; - pblk_debug(pblk, "gc start\n"); -} - -void pblk_gc_should_start(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - if (gc->gc_enabled && !gc->gc_active) { - pblk_gc_start(pblk); - pblk_gc_kick(pblk); - } -} - -void pblk_gc_should_stop(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - if (gc->gc_active && !gc->gc_forced) - gc->gc_active = 0; -} - -void pblk_gc_should_kick(struct pblk *pblk) -{ - pblk_rl_update_rates(&pblk->rl); -} - -void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, - int *gc_active) -{ - struct pblk_gc *gc = &pblk->gc; - - spin_lock(&gc->lock); - *gc_enabled = gc->gc_enabled; - *gc_active = gc->gc_active; - spin_unlock(&gc->lock); -} - -int pblk_gc_sysfs_force(struct pblk *pblk, int force) -{ - struct pblk_gc *gc = &pblk->gc; - - if (force < 0 || force > 1) - return -EINVAL; - - spin_lock(&gc->lock); - gc->gc_forced = force; - - if (force) - gc->gc_enabled = 1; - else - gc->gc_enabled = 0; - spin_unlock(&gc->lock); - - pblk_gc_should_start(pblk); - - return 0; -} - -int pblk_gc_init(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - int ret; - - gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts"); - if (IS_ERR(gc->gc_ts)) { - pblk_err(pblk, "could not allocate GC main kthread\n"); - return PTR_ERR(gc->gc_ts); - } - - gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk, - "pblk-gc-writer-ts"); - if (IS_ERR(gc->gc_writer_ts)) { - pblk_err(pblk, "could not allocate GC writer kthread\n"); - ret = PTR_ERR(gc->gc_writer_ts); - goto fail_free_main_kthread; - } - - gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk, - "pblk-gc-reader-ts"); - if (IS_ERR(gc->gc_reader_ts)) { - pblk_err(pblk, "could not allocate GC reader kthread\n"); - ret = PTR_ERR(gc->gc_reader_ts); - goto fail_free_writer_kthread; - } - - timer_setup(&gc->gc_timer, pblk_gc_timer, 0); - mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); - - gc->gc_active = 0; - gc->gc_forced = 0; - gc->gc_enabled = 1; - gc->w_entries = 0; - atomic_set(&gc->read_inflight_gc, 0); - atomic_set(&gc->pipeline_gc, 0); - - /* Workqueue that reads valid sectors from a line and submit them to the - * GC writer to be recycled. - */ - gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS); - if (!gc->gc_line_reader_wq) { - pblk_err(pblk, "could not allocate GC line reader workqueue\n"); - ret = -ENOMEM; - goto fail_free_reader_kthread; - } - - /* Workqueue that prepare lines for GC */ - gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, 1); - if (!gc->gc_reader_wq) { - pblk_err(pblk, "could not allocate GC reader workqueue\n"); - ret = -ENOMEM; - goto fail_free_reader_line_wq; - } - - spin_lock_init(&gc->lock); - spin_lock_init(&gc->w_lock); - spin_lock_init(&gc->r_lock); - - sema_init(&gc->gc_sem, PBLK_GC_RQ_QD); - - INIT_LIST_HEAD(&gc->w_list); - INIT_LIST_HEAD(&gc->r_list); - - return 0; - -fail_free_reader_line_wq: - destroy_workqueue(gc->gc_line_reader_wq); -fail_free_reader_kthread: - kthread_stop(gc->gc_reader_ts); -fail_free_writer_kthread: - kthread_stop(gc->gc_writer_ts); -fail_free_main_kthread: - kthread_stop(gc->gc_ts); - - return ret; -} - -void pblk_gc_exit(struct pblk *pblk, bool graceful) -{ - struct pblk_gc *gc = &pblk->gc; - - gc->gc_enabled = 0; - del_timer_sync(&gc->gc_timer); - gc->gc_active = 0; - - if (gc->gc_ts) - kthread_stop(gc->gc_ts); - - if (gc->gc_reader_ts) - kthread_stop(gc->gc_reader_ts); - - if (graceful) { - flush_workqueue(gc->gc_reader_wq); - flush_workqueue(gc->gc_line_reader_wq); - } - - destroy_workqueue(gc->gc_reader_wq); - destroy_workqueue(gc->gc_line_reader_wq); - - if (gc->gc_writer_ts) - kthread_stop(gc->gc_writer_ts); -} diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c deleted file mode 100644 index 5924f09c217b..000000000000 --- a/drivers/lightnvm/pblk-init.c +++ /dev/null @@ -1,1324 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a physical block-device target for Open-channel SSDs. - * - * pblk-init.c - pblk's initialization. - */ - -#include "pblk.h" -#include "pblk-trace.h" - -static unsigned int write_buffer_size; - -module_param(write_buffer_size, uint, 0644); -MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); - -struct pblk_global_caches { - struct kmem_cache *ws; - struct kmem_cache *rec; - struct kmem_cache *g_rq; - struct kmem_cache *w_rq; - - struct kref kref; - - struct mutex mutex; /* Ensures consistency between - * caches and kref - */ -}; - -static struct pblk_global_caches pblk_caches = { - .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex), - .kref = KREF_INIT(0), -}; - -struct bio_set pblk_bio_set; - -static blk_qc_t pblk_submit_bio(struct bio *bio) -{ - struct pblk *pblk = bio->bi_bdev->bd_disk->queue->queuedata; - - if (bio_op(bio) == REQ_OP_DISCARD) { - pblk_discard(pblk, bio); - if (!(bio->bi_opf & REQ_PREFLUSH)) { - bio_endio(bio); - return BLK_QC_T_NONE; - } - } - - /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap - * constraint. Writes can be of arbitrary size. - */ - if (bio_data_dir(bio) == READ) { - blk_queue_split(&bio); - pblk_submit_read(pblk, bio); - } else { - /* Prevent deadlock in the case of a modest LUN configuration - * and large user I/Os. Unless stalled, the rate limiter - * leaves at least 256KB available for user I/O. - */ - if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) - blk_queue_split(&bio); - - pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); - } - - return BLK_QC_T_NONE; -} - -static const struct block_device_operations pblk_bops = { - .owner = THIS_MODULE, - .submit_bio = pblk_submit_bio, -}; - - -static size_t pblk_trans_map_size(struct pblk *pblk) -{ - int entry_size = 8; - - if (pblk->addrf_len < 32) - entry_size = 4; - - return entry_size * pblk->capacity; -} - -#ifdef CONFIG_NVM_PBLK_DEBUG -static u32 pblk_l2p_crc(struct pblk *pblk) -{ - size_t map_size; - u32 crc = ~(u32)0; - - map_size = pblk_trans_map_size(pblk); - crc = crc32_le(crc, pblk->trans_map, map_size); - return crc; -} -#endif - -static void pblk_l2p_free(struct pblk *pblk) -{ - vfree(pblk->trans_map); -} - -static int pblk_l2p_recover(struct pblk *pblk, bool factory_init) -{ - struct pblk_line *line = NULL; - - if (factory_init) { - guid_gen(&pblk->instance_uuid); - } else { - line = pblk_recov_l2p(pblk); - if (IS_ERR(line)) { - pblk_err(pblk, "could not recover l2p table\n"); - return -EFAULT; - } - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_info(pblk, "init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); -#endif - - /* Free full lines directly as GC has not been started yet */ - pblk_gc_free_full_lines(pblk); - - if (!line) { - /* Configure next line for user data */ - line = pblk_line_get_first_data(pblk); - if (!line) - return -EFAULT; - } - - return 0; -} - -static int pblk_l2p_init(struct pblk *pblk, bool factory_init) -{ - sector_t i; - struct ppa_addr ppa; - size_t map_size; - int ret = 0; - - map_size = pblk_trans_map_size(pblk); - pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN | - __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM); - if (!pblk->trans_map) { - pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n", - map_size); - return -ENOMEM; - } - - pblk_ppa_set_empty(&ppa); - - for (i = 0; i < pblk->capacity; i++) - pblk_trans_map_set(pblk, i, ppa); - - ret = pblk_l2p_recover(pblk, factory_init); - if (ret) - vfree(pblk->trans_map); - - return ret; -} - -static void pblk_rwb_free(struct pblk *pblk) -{ - if (pblk_rb_tear_down_check(&pblk->rwb)) - pblk_err(pblk, "write buffer error on tear down\n"); - - pblk_rb_free(&pblk->rwb); -} - -static int pblk_rwb_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - unsigned long buffer_size; - int pgs_in_buffer, threshold; - - threshold = geo->mw_cunits * geo->all_luns; - pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) - * geo->all_luns; - - if (write_buffer_size && (write_buffer_size > pgs_in_buffer)) - buffer_size = write_buffer_size; - else - buffer_size = pgs_in_buffer; - - return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); -} - -static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, - struct nvm_addrf_12 *dst) -{ - struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf; - int power_len; - - /* Re-calculate channel and lun format to adapt to configuration */ - power_len = get_count_order(geo->num_ch); - if (1 << power_len != geo->num_ch) { - pblk_err(pblk, "supports only power-of-two channel config.\n"); - return -EINVAL; - } - dst->ch_len = power_len; - - power_len = get_count_order(geo->num_lun); - if (1 << power_len != geo->num_lun) { - pblk_err(pblk, "supports only power-of-two LUN config.\n"); - return -EINVAL; - } - dst->lun_len = power_len; - - dst->blk_len = src->blk_len; - dst->pg_len = src->pg_len; - dst->pln_len = src->pln_len; - dst->sec_len = src->sec_len; - - dst->sec_offset = 0; - dst->pln_offset = dst->sec_len; - dst->ch_offset = dst->pln_offset + dst->pln_len; - dst->lun_offset = dst->ch_offset + dst->ch_len; - dst->pg_offset = dst->lun_offset + dst->lun_len; - dst->blk_offset = dst->pg_offset + dst->pg_len; - - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; - dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; - dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; - - return dst->blk_offset + src->blk_len; -} - -static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst, - struct pblk_addrf *udst) -{ - struct nvm_addrf *src = &geo->addrf; - - adst->ch_len = get_count_order(geo->num_ch); - adst->lun_len = get_count_order(geo->num_lun); - adst->chk_len = src->chk_len; - adst->sec_len = src->sec_len; - - adst->sec_offset = 0; - adst->ch_offset = adst->sec_len; - adst->lun_offset = adst->ch_offset + adst->ch_len; - adst->chk_offset = adst->lun_offset + adst->lun_len; - - adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset; - adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset; - adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset; - adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset; - - udst->sec_stripe = geo->ws_opt; - udst->ch_stripe = geo->num_ch; - udst->lun_stripe = geo->num_lun; - - udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe; - udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe; - - return adst->chk_offset + adst->chk_len; -} - -static int pblk_set_addrf(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int mod; - - switch (geo->version) { - case NVM_OCSSD_SPEC_12: - div_u64_rem(geo->clba, pblk->min_write_pgs, &mod); - if (mod) { - pblk_err(pblk, "bad configuration of sectors/pages\n"); - return -EINVAL; - } - - pblk->addrf_len = pblk_set_addrf_12(pblk, geo, - (void *)&pblk->addrf); - break; - case NVM_OCSSD_SPEC_20: - pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf, - &pblk->uaddrf); - break; - default: - pblk_err(pblk, "OCSSD revision not supported (%d)\n", - geo->version); - return -EINVAL; - } - - return 0; -} - -static int pblk_create_global_caches(void) -{ - - pblk_caches.ws = kmem_cache_create("pblk_blk_ws", - sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_caches.ws) - return -ENOMEM; - - pblk_caches.rec = kmem_cache_create("pblk_rec", - sizeof(struct pblk_rec_ctx), 0, 0, NULL); - if (!pblk_caches.rec) - goto fail_destroy_ws; - - pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, - 0, 0, NULL); - if (!pblk_caches.g_rq) - goto fail_destroy_rec; - - pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, - 0, 0, NULL); - if (!pblk_caches.w_rq) - goto fail_destroy_g_rq; - - return 0; - -fail_destroy_g_rq: - kmem_cache_destroy(pblk_caches.g_rq); -fail_destroy_rec: - kmem_cache_destroy(pblk_caches.rec); -fail_destroy_ws: - kmem_cache_destroy(pblk_caches.ws); - - return -ENOMEM; -} - -static int pblk_get_global_caches(void) -{ - int ret = 0; - - mutex_lock(&pblk_caches.mutex); - - if (kref_get_unless_zero(&pblk_caches.kref)) - goto out; - - ret = pblk_create_global_caches(); - if (!ret) - kref_init(&pblk_caches.kref); - -out: - mutex_unlock(&pblk_caches.mutex); - return ret; -} - -static void pblk_destroy_global_caches(struct kref *ref) -{ - struct pblk_global_caches *c; - - c = container_of(ref, struct pblk_global_caches, kref); - - kmem_cache_destroy(c->ws); - kmem_cache_destroy(c->rec); - kmem_cache_destroy(c->g_rq); - kmem_cache_destroy(c->w_rq); -} - -static void pblk_put_global_caches(void) -{ - mutex_lock(&pblk_caches.mutex); - kref_put(&pblk_caches.kref, pblk_destroy_global_caches); - mutex_unlock(&pblk_caches.mutex); -} - -static int pblk_core_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int ret, max_write_ppas; - - atomic64_set(&pblk->user_wa, 0); - atomic64_set(&pblk->pad_wa, 0); - atomic64_set(&pblk->gc_wa, 0); - pblk->user_rst_wa = 0; - pblk->pad_rst_wa = 0; - pblk->gc_rst_wa = 0; - - atomic64_set(&pblk->nr_flush, 0); - pblk->nr_flush_rst = 0; - - pblk->min_write_pgs = geo->ws_opt; - pblk->min_write_pgs_data = pblk->min_write_pgs; - max_write_ppas = pblk->min_write_pgs * geo->all_luns; - pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); - pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, - queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); - pblk_set_sec_per_write(pblk, pblk->min_write_pgs); - - pblk->oob_meta_size = geo->sos; - if (!pblk_is_oob_meta_supported(pblk)) { - /* For drives which does not have OOB metadata feature - * in order to support recovery feature we need to use - * so called packed metadata. Packed metada will store - * the same information as OOB metadata (l2p table mapping, - * but in the form of the single page at the end of - * every write request. - */ - if (pblk->min_write_pgs - * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { - /* We want to keep all the packed metadata on single - * page per write requests. So we need to ensure that - * it will fit. - * - * This is more like sanity check, since there is - * no device with such a big minimal write size - * (above 1 metabytes). - */ - pblk_err(pblk, "Not supported min write size\n"); - return -EINVAL; - } - /* For packed meta approach we do some simplification. - * On read path we always issue requests which size - * equal to max_write_pgs, with all pages filled with - * user payload except of last one page which will be - * filled with packed metadata. - */ - pblk->max_write_pgs = pblk->min_write_pgs; - pblk->min_write_pgs_data = pblk->min_write_pgs - 1; - } - - pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), - GFP_KERNEL); - if (!pblk->pad_dist) - return -ENOMEM; - - if (pblk_get_global_caches()) - goto fail_free_pad_dist; - - /* Internal bios can be at most the sectors signaled by the device. */ - ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0); - if (ret) - goto free_global_caches; - - ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, - pblk_caches.ws); - if (ret) - goto free_page_bio_pool; - - ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, - pblk_caches.rec); - if (ret) - goto free_gen_ws_pool; - - ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, - pblk_caches.g_rq); - if (ret) - goto free_rec_pool; - - ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, - pblk_caches.g_rq); - if (ret) - goto free_r_rq_pool; - - ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, - pblk_caches.w_rq); - if (ret) - goto free_e_rq_pool; - - pblk->close_wq = alloc_workqueue("pblk-close-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS); - if (!pblk->close_wq) - goto free_w_rq_pool; - - pblk->bb_wq = alloc_workqueue("pblk-bb-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (!pblk->bb_wq) - goto free_close_wq; - - pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (!pblk->r_end_wq) - goto free_bb_wq; - - if (pblk_set_addrf(pblk)) - goto free_r_end_wq; - - INIT_LIST_HEAD(&pblk->compl_list); - INIT_LIST_HEAD(&pblk->resubmit_list); - - return 0; - -free_r_end_wq: - destroy_workqueue(pblk->r_end_wq); -free_bb_wq: - destroy_workqueue(pblk->bb_wq); -free_close_wq: - destroy_workqueue(pblk->close_wq); -free_w_rq_pool: - mempool_exit(&pblk->w_rq_pool); -free_e_rq_pool: - mempool_exit(&pblk->e_rq_pool); -free_r_rq_pool: - mempool_exit(&pblk->r_rq_pool); -free_rec_pool: - mempool_exit(&pblk->rec_pool); -free_gen_ws_pool: - mempool_exit(&pblk->gen_ws_pool); -free_page_bio_pool: - mempool_exit(&pblk->page_bio_pool); -free_global_caches: - pblk_put_global_caches(); -fail_free_pad_dist: - kfree(pblk->pad_dist); - return -ENOMEM; -} - -static void pblk_core_free(struct pblk *pblk) -{ - if (pblk->close_wq) - destroy_workqueue(pblk->close_wq); - - if (pblk->r_end_wq) - destroy_workqueue(pblk->r_end_wq); - - if (pblk->bb_wq) - destroy_workqueue(pblk->bb_wq); - - mempool_exit(&pblk->page_bio_pool); - mempool_exit(&pblk->gen_ws_pool); - mempool_exit(&pblk->rec_pool); - mempool_exit(&pblk->r_rq_pool); - mempool_exit(&pblk->e_rq_pool); - mempool_exit(&pblk->w_rq_pool); - - pblk_put_global_caches(); - kfree(pblk->pad_dist); -} - -static void pblk_line_mg_free(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int i; - - kfree(l_mg->bb_template); - kfree(l_mg->bb_aux); - kfree(l_mg->vsc_list); - - for (i = 0; i < PBLK_DATA_LINES; i++) { - kfree(l_mg->sline_meta[i]); - kvfree(l_mg->eline_meta[i]->buf); - kfree(l_mg->eline_meta[i]); - } - - mempool_destroy(l_mg->bitmap_pool); - kmem_cache_destroy(l_mg->bitmap_cache); -} - -static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, - struct pblk_line *line) -{ - struct pblk_w_err_gc *w_err_gc = line->w_err_gc; - - kfree(line->blk_bitmap); - kfree(line->erase_bitmap); - kfree(line->chks); - - kvfree(w_err_gc->lba_list); - kfree(w_err_gc); -} - -static void pblk_lines_free(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - int i; - - for (i = 0; i < l_mg->nr_lines; i++) { - line = &pblk->lines[i]; - - pblk_line_free(line); - pblk_line_meta_free(l_mg, line); - } - - pblk_line_mg_free(pblk); - - kfree(pblk->luns); - kfree(pblk->lines); -} - -static int pblk_luns_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int i; - - /* TODO: Implement unbalanced LUN support */ - if (geo->num_lun < 0) { - pblk_err(pblk, "unbalanced LUN config.\n"); - return -EINVAL; - } - - pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun), - GFP_KERNEL); - if (!pblk->luns) - return -ENOMEM; - - for (i = 0; i < geo->all_luns; i++) { - /* Stripe across channels */ - int ch = i % geo->num_ch; - int lun_raw = i / geo->num_ch; - int lunid = lun_raw + ch * geo->num_lun; - - rlun = &pblk->luns[i]; - rlun->bppa = dev->luns[lunid]; - - sema_init(&rlun->wr_sem, 1); - } - - return 0; -} - -/* See comment over struct line_emeta definition */ -static unsigned int calc_emeta_len(struct pblk *pblk) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - /* Round to sector size so that lba_list starts on its own sector */ - lm->emeta_sec[1] = DIV_ROUND_UP( - sizeof(struct line_emeta) + lm->blk_bitmap_len + - sizeof(struct wa_counters), geo->csecs); - lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs; - - /* Round to sector size so that vsc_list starts on its own sector */ - lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0]; - lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64), - geo->csecs); - lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs; - - lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32), - geo->csecs); - lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs; - - lm->vsc_list_len = l_mg->nr_lines * sizeof(u32); - - return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); -} - -static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_geo *geo = &dev->geo; - sector_t provisioned; - int sec_meta, blk_meta, clba; - int minimum; - - if (geo->op == NVM_TARGET_DEFAULT_OP) - pblk->op = PBLK_DEFAULT_OP; - else - pblk->op = geo->op; - - minimum = pblk_get_min_chks(pblk); - provisioned = nr_free_chks; - provisioned *= (100 - pblk->op); - sector_div(provisioned, 100); - - if ((nr_free_chks - provisioned) < minimum) { - if (geo->op != NVM_TARGET_DEFAULT_OP) { - pblk_err(pblk, "OP too small to create a sane instance\n"); - return -EINTR; - } - - /* If the user did not specify an OP value, and PBLK_DEFAULT_OP - * is not enough, calculate and set sane value - */ - - provisioned = nr_free_chks - minimum; - pblk->op = (100 * minimum) / nr_free_chks; - pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", - pblk->op); - } - - pblk->op_blks = nr_free_chks - provisioned; - - /* Internally pblk manages all free blocks, but all calculations based - * on user capacity consider only provisioned blocks - */ - pblk->rl.total_blocks = nr_free_chks; - - /* Consider sectors used for metadata */ - sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; - blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - - clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; - pblk->capacity = (provisioned - blk_meta) * clba; - - atomic_set(&pblk->rl.free_blocks, nr_free_chks); - atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); - - return 0; -} - -static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, - struct nvm_chk_meta *meta) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int i, nr_bad_chks = 0; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - struct nvm_chk_meta *chunk; - struct nvm_chk_meta *chunk_meta; - struct ppa_addr ppa; - int pos; - - ppa = rlun->bppa; - pos = pblk_ppa_to_pos(geo, ppa); - chunk = &line->chks[pos]; - - ppa.m.chk = line->id; - chunk_meta = pblk_chunk_get_off(pblk, meta, ppa); - - chunk->state = chunk_meta->state; - chunk->type = chunk_meta->type; - chunk->wi = chunk_meta->wi; - chunk->slba = chunk_meta->slba; - chunk->cnlb = chunk_meta->cnlb; - chunk->wp = chunk_meta->wp; - - trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa, - chunk->state); - - if (chunk->type & NVM_CHK_TP_SZ_SPEC) { - WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); - continue; - } - - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - - set_bit(pos, line->blk_bitmap); - nr_bad_chks++; - } - - return nr_bad_chks; -} - -static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, - void *chunk_meta, int line_id) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - long nr_bad_chks, chk_in_line; - - line->pblk = pblk; - line->id = line_id; - line->type = PBLK_LINETYPE_FREE; - line->state = PBLK_LINESTATE_NEW; - line->gc_group = PBLK_LINEGC_NONE; - line->vsc = &l_mg->vsc_list[line_id]; - spin_lock_init(&line->lock); - - nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); - - chk_in_line = lm->blk_per_line - nr_bad_chks; - if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || - chk_in_line < lm->min_blk_line) { - line->state = PBLK_LINESTATE_BAD; - list_add_tail(&line->list, &l_mg->bad_list); - return 0; - } - - atomic_set(&line->blk_in_line, chk_in_line); - list_add_tail(&line->list, &l_mg->free_list); - l_mg->nr_free_lines++; - - return chk_in_line; -} - -static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - - line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->blk_bitmap) - return -ENOMEM; - - line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->erase_bitmap) - goto free_blk_bitmap; - - - line->chks = kmalloc_array(lm->blk_per_line, - sizeof(struct nvm_chk_meta), GFP_KERNEL); - if (!line->chks) - goto free_erase_bitmap; - - line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL); - if (!line->w_err_gc) - goto free_chks; - - return 0; - -free_chks: - kfree(line->chks); -free_erase_bitmap: - kfree(line->erase_bitmap); -free_blk_bitmap: - kfree(line->blk_bitmap); - return -ENOMEM; -} - -static int pblk_line_mg_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - int i, bb_distance; - - l_mg->nr_lines = geo->num_chk; - l_mg->log_line = l_mg->data_line = NULL; - l_mg->l_seq_nr = l_mg->d_seq_nr = 0; - l_mg->nr_free_lines = 0; - bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); - - INIT_LIST_HEAD(&l_mg->free_list); - INIT_LIST_HEAD(&l_mg->corrupt_list); - INIT_LIST_HEAD(&l_mg->bad_list); - INIT_LIST_HEAD(&l_mg->gc_full_list); - INIT_LIST_HEAD(&l_mg->gc_high_list); - INIT_LIST_HEAD(&l_mg->gc_mid_list); - INIT_LIST_HEAD(&l_mg->gc_low_list); - INIT_LIST_HEAD(&l_mg->gc_empty_list); - INIT_LIST_HEAD(&l_mg->gc_werr_list); - - INIT_LIST_HEAD(&l_mg->emeta_list); - - l_mg->gc_lists[0] = &l_mg->gc_werr_list; - l_mg->gc_lists[1] = &l_mg->gc_high_list; - l_mg->gc_lists[2] = &l_mg->gc_mid_list; - l_mg->gc_lists[3] = &l_mg->gc_low_list; - - spin_lock_init(&l_mg->free_lock); - spin_lock_init(&l_mg->close_lock); - spin_lock_init(&l_mg->gc_lock); - - l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL); - if (!l_mg->vsc_list) - goto fail; - - l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!l_mg->bb_template) - goto fail_free_vsc_list; - - l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!l_mg->bb_aux) - goto fail_free_bb_template; - - /* smeta is always small enough to fit on a kmalloc memory allocation, - * emeta depends on the number of LUNs allocated to the pblk instance - */ - for (i = 0; i < PBLK_DATA_LINES; i++) { - l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL); - if (!l_mg->sline_meta[i]) - goto fail_free_smeta; - } - - l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap", - lm->sec_bitmap_len, 0, 0, NULL); - if (!l_mg->bitmap_cache) - goto fail_free_smeta; - - /* the bitmap pool is used for both valid and map bitmaps */ - l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2, - l_mg->bitmap_cache); - if (!l_mg->bitmap_pool) - goto fail_destroy_bitmap_cache; - - /* emeta allocates three different buffers for managing metadata with - * in-memory and in-media layouts - */ - for (i = 0; i < PBLK_DATA_LINES; i++) { - struct pblk_emeta *emeta; - - emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL); - if (!emeta) - goto fail_free_emeta; - - emeta->buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL); - if (!emeta->buf) { - kfree(emeta); - goto fail_free_emeta; - } - - emeta->nr_entries = lm->emeta_sec[0]; - l_mg->eline_meta[i] = emeta; - } - - for (i = 0; i < l_mg->nr_lines; i++) - l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY); - - bb_distance = (geo->all_luns) * geo->ws_opt; - for (i = 0; i < lm->sec_per_line; i += bb_distance) - bitmap_set(l_mg->bb_template, i, geo->ws_opt); - - return 0; - -fail_free_emeta: - while (--i >= 0) { - kvfree(l_mg->eline_meta[i]->buf); - kfree(l_mg->eline_meta[i]); - } - - mempool_destroy(l_mg->bitmap_pool); -fail_destroy_bitmap_cache: - kmem_cache_destroy(l_mg->bitmap_cache); -fail_free_smeta: - for (i = 0; i < PBLK_DATA_LINES; i++) - kfree(l_mg->sline_meta[i]); - kfree(l_mg->bb_aux); -fail_free_bb_template: - kfree(l_mg->bb_template); -fail_free_vsc_list: - kfree(l_mg->vsc_list); -fail: - return -ENOMEM; -} - -static int pblk_line_meta_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - unsigned int smeta_len, emeta_len; - int i; - - lm->sec_per_line = geo->clba * geo->all_luns; - lm->blk_per_line = geo->all_luns; - lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); - lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); - lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); - lm->mid_thrs = lm->sec_per_line / 2; - lm->high_thrs = lm->sec_per_line / 4; - lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs; - - /* Calculate necessary pages for smeta. See comment over struct - * line_smeta definition - */ - i = 1; -add_smeta_page: - lm->smeta_sec = i * geo->ws_opt; - lm->smeta_len = lm->smeta_sec * geo->csecs; - - smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len; - if (smeta_len > lm->smeta_len) { - i++; - goto add_smeta_page; - } - - /* Calculate necessary pages for emeta. See comment over struct - * line_emeta definition - */ - i = 1; -add_emeta_page: - lm->emeta_sec[0] = i * geo->ws_opt; - lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs; - - emeta_len = calc_emeta_len(pblk); - if (emeta_len > lm->emeta_len[0]) { - i++; - goto add_emeta_page; - } - - lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0; - - lm->min_blk_line = 1; - if (geo->all_luns > 1) - lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + - lm->emeta_sec[0], geo->clba); - - if (lm->min_blk_line > lm->blk_per_line) { - pblk_err(pblk, "config. not supported. Min. LUN in line:%d\n", - lm->blk_per_line); - return -EINVAL; - } - - return 0; -} - -static int pblk_lines_init(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - void *chunk_meta; - int nr_free_chks = 0; - int i, ret; - - ret = pblk_line_meta_init(pblk); - if (ret) - return ret; - - ret = pblk_line_mg_init(pblk); - if (ret) - return ret; - - ret = pblk_luns_init(pblk); - if (ret) - goto fail_free_meta; - - chunk_meta = pblk_get_chunk_meta(pblk); - if (IS_ERR(chunk_meta)) { - ret = PTR_ERR(chunk_meta); - goto fail_free_luns; - } - - pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), - GFP_KERNEL); - if (!pblk->lines) { - ret = -ENOMEM; - goto fail_free_chunk_meta; - } - - for (i = 0; i < l_mg->nr_lines; i++) { - line = &pblk->lines[i]; - - ret = pblk_alloc_line_meta(pblk, line); - if (ret) - goto fail_free_lines; - - nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); - - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - } - - if (!nr_free_chks) { - pblk_err(pblk, "too many bad blocks prevent for sane instance\n"); - ret = -EINTR; - goto fail_free_lines; - } - - ret = pblk_set_provision(pblk, nr_free_chks); - if (ret) - goto fail_free_lines; - - vfree(chunk_meta); - return 0; - -fail_free_lines: - while (--i >= 0) - pblk_line_meta_free(l_mg, &pblk->lines[i]); - kfree(pblk->lines); -fail_free_chunk_meta: - vfree(chunk_meta); -fail_free_luns: - kfree(pblk->luns); -fail_free_meta: - pblk_line_mg_free(pblk); - - return ret; -} - -static int pblk_writer_init(struct pblk *pblk) -{ - pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); - if (IS_ERR(pblk->writer_ts)) { - int err = PTR_ERR(pblk->writer_ts); - - if (err != -EINTR) - pblk_err(pblk, "could not allocate writer kthread (%d)\n", - err); - return err; - } - - timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0); - mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); - - return 0; -} - -static void pblk_writer_stop(struct pblk *pblk) -{ - /* The pipeline must be stopped and the write buffer emptied before the - * write thread is stopped - */ - WARN(pblk_rb_read_count(&pblk->rwb), - "Stopping not fully persisted write buffer\n"); - - WARN(pblk_rb_sync_count(&pblk->rwb), - "Stopping not fully synced write buffer\n"); - - del_timer_sync(&pblk->wtimer); - if (pblk->writer_ts) - kthread_stop(pblk->writer_ts); -} - -static void pblk_free(struct pblk *pblk) -{ - pblk_lines_free(pblk); - pblk_l2p_free(pblk); - pblk_rwb_free(pblk); - pblk_core_free(pblk); - - kfree(pblk); -} - -static void pblk_tear_down(struct pblk *pblk, bool graceful) -{ - if (graceful) - __pblk_pipeline_flush(pblk); - __pblk_pipeline_stop(pblk); - pblk_writer_stop(pblk); - pblk_rb_sync_l2p(&pblk->rwb); - pblk_rl_free(&pblk->rl); - - pblk_debug(pblk, "consistent tear down (graceful:%d)\n", graceful); -} - -static void pblk_exit(void *private, bool graceful) -{ - struct pblk *pblk = private; - - pblk_gc_exit(pblk, graceful); - pblk_tear_down(pblk, graceful); - -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_info(pblk, "exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); -#endif - - pblk_free(pblk); -} - -static sector_t pblk_capacity(void *private) -{ - struct pblk *pblk = private; - - return pblk->capacity * NR_PHY_IN_LOG; -} - -static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, - int flags) -{ - struct nvm_geo *geo = &dev->geo; - struct request_queue *bqueue = dev->q; - struct request_queue *tqueue = tdisk->queue; - struct pblk *pblk; - int ret; - - pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL); - if (!pblk) - return ERR_PTR(-ENOMEM); - - pblk->dev = dev; - pblk->disk = tdisk; - pblk->state = PBLK_STATE_RUNNING; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); - pblk->gc.gc_enabled = 0; - - if (!(geo->version == NVM_OCSSD_SPEC_12 || - geo->version == NVM_OCSSD_SPEC_20)) { - pblk_err(pblk, "OCSSD version not supported (%u)\n", - geo->version); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - - if (geo->ext) { - pblk_err(pblk, "extended metadata not supported\n"); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - - spin_lock_init(&pblk->resubmit_lock); - spin_lock_init(&pblk->trans_lock); - spin_lock_init(&pblk->lock); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_set(&pblk->inflight_writes, 0); - atomic_long_set(&pblk->padded_writes, 0); - atomic_long_set(&pblk->padded_wb, 0); - atomic_long_set(&pblk->req_writes, 0); - atomic_long_set(&pblk->sub_writes, 0); - atomic_long_set(&pblk->sync_writes, 0); - atomic_long_set(&pblk->inflight_reads, 0); - atomic_long_set(&pblk->cache_reads, 0); - atomic_long_set(&pblk->sync_reads, 0); - atomic_long_set(&pblk->recov_writes, 0); - atomic_long_set(&pblk->recov_writes, 0); - atomic_long_set(&pblk->recov_gc_writes, 0); - atomic_long_set(&pblk->recov_gc_reads, 0); -#endif - - atomic_long_set(&pblk->read_failed, 0); - atomic_long_set(&pblk->read_empty, 0); - atomic_long_set(&pblk->read_high_ecc, 0); - atomic_long_set(&pblk->read_failed_gc, 0); - atomic_long_set(&pblk->write_failed, 0); - atomic_long_set(&pblk->erase_failed, 0); - - ret = pblk_core_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize core\n"); - goto fail; - } - - ret = pblk_lines_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize lines\n"); - goto fail_free_core; - } - - ret = pblk_rwb_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize write buffer\n"); - goto fail_free_lines; - } - - ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY); - if (ret) { - pblk_err(pblk, "could not initialize maps\n"); - goto fail_free_rwb; - } - - ret = pblk_writer_init(pblk); - if (ret) { - if (ret != -EINTR) - pblk_err(pblk, "could not initialize write thread\n"); - goto fail_free_l2p; - } - - ret = pblk_gc_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize gc\n"); - goto fail_stop_writer; - } - - /* inherit the size from the underlying device */ - blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); - blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); - - blk_queue_write_cache(tqueue, true, false); - - tqueue->limits.discard_granularity = geo->clba * geo->csecs; - tqueue->limits.discard_alignment = 0; - blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue); - - pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", - geo->all_luns, pblk->l_mg.nr_lines, - (unsigned long long)pblk->capacity, - pblk->rwb.nr_entries); - - wake_up_process(pblk->writer_ts); - - /* Check if we need to start GC */ - pblk_gc_should_kick(pblk); - - return pblk; - -fail_stop_writer: - pblk_writer_stop(pblk); -fail_free_l2p: - pblk_l2p_free(pblk); -fail_free_rwb: - pblk_rwb_free(pblk); -fail_free_lines: - pblk_lines_free(pblk); -fail_free_core: - pblk_core_free(pblk); -fail: - kfree(pblk); - return ERR_PTR(ret); -} - -/* physical block device target */ -static struct nvm_tgt_type tt_pblk = { - .name = "pblk", - .version = {1, 0, 0}, - - .bops = &pblk_bops, - .capacity = pblk_capacity, - - .init = pblk_init, - .exit = pblk_exit, - - .sysfs_init = pblk_sysfs_init, - .sysfs_exit = pblk_sysfs_exit, - .owner = THIS_MODULE, -}; - -static int __init pblk_module_init(void) -{ - int ret; - - ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0); - if (ret) - return ret; - ret = nvm_register_tgt_type(&tt_pblk); - if (ret) - bioset_exit(&pblk_bio_set); - return ret; -} - -static void pblk_module_exit(void) -{ - bioset_exit(&pblk_bio_set); - nvm_unregister_tgt_type(&tt_pblk); -} - -module_init(pblk_module_init); -module_exit(pblk_module_exit); -MODULE_AUTHOR("Javier Gonzalez "); -MODULE_AUTHOR("Matias Bjorling "); -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs"); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c deleted file mode 100644 index 5408e32b2f13..000000000000 --- a/drivers/lightnvm/pblk-map.c +++ /dev/null @@ -1,210 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-map.c - pblk's lba-ppa mapping strategy - * - */ - -#include "pblk.h" - -static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, - struct ppa_addr *ppa_list, - unsigned long *lun_bitmap, - void *meta_list, - unsigned int valid_secs) -{ - struct pblk_line *line = pblk_line_get_data(pblk); - struct pblk_emeta *emeta; - struct pblk_w_ctx *w_ctx; - __le64 *lba_list; - u64 paddr; - int nr_secs = pblk->min_write_pgs; - int i; - - if (!line) - return -ENOSPC; - - if (pblk_line_is_full(line)) { - struct pblk_line *prev_line = line; - - /* If we cannot allocate a new line, make sure to store metadata - * on current line and then fail - */ - line = pblk_line_replace_data(pblk); - pblk_line_close_meta(pblk, prev_line); - - if (!line) { - pblk_pipeline_stop(pblk); - return -ENOSPC; - } - - } - - emeta = line->emeta; - lba_list = emeta_to_lbas(pblk, emeta->buf); - - paddr = pblk_alloc_page(pblk, line, nr_secs); - - for (i = 0; i < nr_secs; i++, paddr++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - /* ppa to be sent to the device */ - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - /* Write context for target bio completion on write buffer. Note - * that the write buffer is protected by the sync backpointer, - * and a single writer thread have access to each specific entry - * at a time. Thus, it is safe to modify the context for the - * entry we are setting up for submission without taking any - * lock or memory barrier. - */ - if (i < valid_secs) { - kref_get(&line->ref); - atomic_inc(&line->sec_to_update); - w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i); - w_ctx->ppa = ppa_list[i]; - meta->lba = cpu_to_le64(w_ctx->lba); - lba_list[paddr] = cpu_to_le64(w_ctx->lba); - if (lba_list[paddr] != addr_empty) - line->nr_valid_lbas++; - else - atomic64_inc(&pblk->pad_wa); - } else { - lba_list[paddr] = addr_empty; - meta->lba = addr_empty; - __pblk_map_invalidate(pblk, line, paddr); - } - } - - pblk_down_rq(pblk, ppa_list[0], lun_bitmap); - return 0; -} - -int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, - unsigned long *lun_bitmap, unsigned int valid_secs, - unsigned int off) -{ - void *meta_list = pblk_get_meta_for_writes(pblk, rqd); - void *meta_buffer; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - unsigned int map_secs; - int min = pblk->min_write_pgs; - int i; - int ret; - - for (i = off; i < rqd->nr_ppas; i += min) { - map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - meta_buffer = pblk_get_meta(pblk, meta_list, i); - - ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, meta_buffer, map_secs); - if (ret) - return ret; - } - - return 0; -} - -/* only if erase_ppa is set, acquire erase semaphore */ -int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int sentry, unsigned long *lun_bitmap, - unsigned int valid_secs, struct ppa_addr *erase_ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - void *meta_list = pblk_get_meta_for_writes(pblk, rqd); - void *meta_buffer; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - struct pblk_line *e_line, *d_line; - unsigned int map_secs; - int min = pblk->min_write_pgs; - int i, erase_lun; - int ret; - - - for (i = 0; i < rqd->nr_ppas; i += min) { - map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - meta_buffer = pblk_get_meta(pblk, meta_list, i); - - ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, meta_buffer, map_secs); - if (ret) - return ret; - - erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); - - /* line can change after page map. We might also be writing the - * last line. - */ - e_line = pblk_line_get_erase(pblk); - if (!e_line) - return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, - valid_secs, i + min); - - spin_lock(&e_line->lock); - if (!test_bit(erase_lun, e_line->erase_bitmap)) { - set_bit(erase_lun, e_line->erase_bitmap); - atomic_dec(&e_line->left_eblks); - - *erase_ppa = ppa_list[i]; - erase_ppa->a.blk = e_line->id; - erase_ppa->a.reserved = 0; - - spin_unlock(&e_line->lock); - - /* Avoid evaluating e_line->left_eblks */ - return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, - valid_secs, i + min); - } - spin_unlock(&e_line->lock); - } - - d_line = pblk_line_get_data(pblk); - - /* line can change after page map. We might also be writing the - * last line. - */ - e_line = pblk_line_get_erase(pblk); - if (!e_line) - return -ENOSPC; - - /* Erase blocks that are bad in this line but might not be in next */ - if (unlikely(pblk_ppa_empty(*erase_ppa)) && - bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) { - int bit = -1; - -retry: - bit = find_next_bit(d_line->blk_bitmap, - lm->blk_per_line, bit + 1); - if (bit >= lm->blk_per_line) - return 0; - - spin_lock(&e_line->lock); - if (test_bit(bit, e_line->erase_bitmap)) { - spin_unlock(&e_line->lock); - goto retry; - } - spin_unlock(&e_line->lock); - - set_bit(bit, e_line->erase_bitmap); - atomic_dec(&e_line->left_eblks); - *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */ - erase_ppa->a.blk = e_line->id; - } - - return 0; -} diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c deleted file mode 100644 index 5abb1705b039..000000000000 --- a/drivers/lightnvm/pblk-rb.c +++ /dev/null @@ -1,858 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * - * Based upon the circular ringbuffer. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-rb.c - pblk's write buffer - */ - -#include - -#include "pblk.h" - -static DECLARE_RWSEM(pblk_rb_lock); - -static void pblk_rb_data_free(struct pblk_rb *rb) -{ - struct pblk_rb_pages *p, *t; - - down_write(&pblk_rb_lock); - list_for_each_entry_safe(p, t, &rb->pages, list) { - free_pages((unsigned long)page_address(p->pages), p->order); - list_del(&p->list); - kfree(p); - } - up_write(&pblk_rb_lock); -} - -void pblk_rb_free(struct pblk_rb *rb) -{ - pblk_rb_data_free(rb); - vfree(rb->entries); -} - -/* - * pblk_rb_calculate_size -- calculate the size of the write buffer - */ -static unsigned int pblk_rb_calculate_size(unsigned int nr_entries, - unsigned int threshold) -{ - unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA)); - unsigned int max_sz = max(thr_sz, nr_entries); - unsigned int max_io; - - /* Alloc a write buffer that can (i) fit at least two split bios - * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the - * threshold will be respected - */ - max_io = (1 << max((int)(get_count_order(max_sz)), - (int)(get_count_order(NVM_MAX_VLBA << 1)))); - if ((threshold + NVM_MAX_VLBA) >= max_io) - max_io <<= 1; - - return max_io; -} - -/* - * Initialize ring buffer. The data and metadata buffers must be previously - * allocated and their size must be a power of two - * (Documentation/core-api/circular-buffers.rst) - */ -int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, - unsigned int seg_size) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entries; - unsigned int init_entry = 0; - unsigned int max_order = MAX_ORDER - 1; - unsigned int power_size, power_seg_sz; - unsigned int alloc_order, order, iter; - unsigned int nr_entries; - - nr_entries = pblk_rb_calculate_size(size, threshold); - entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); - if (!entries) - return -ENOMEM; - - power_size = get_count_order(nr_entries); - power_seg_sz = get_count_order(seg_size); - - down_write(&pblk_rb_lock); - rb->entries = entries; - rb->seg_size = (1 << power_seg_sz); - rb->nr_entries = (1 << power_size); - rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; - rb->back_thres = threshold; - rb->flush_point = EMPTY_ENTRY; - - spin_lock_init(&rb->w_lock); - spin_lock_init(&rb->s_lock); - - INIT_LIST_HEAD(&rb->pages); - - alloc_order = power_size; - if (alloc_order >= max_order) { - order = max_order; - iter = (1 << (alloc_order - max_order)); - } else { - order = alloc_order; - iter = 1; - } - - do { - struct pblk_rb_entry *entry; - struct pblk_rb_pages *page_set; - void *kaddr; - unsigned long set_size; - int i; - - page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); - if (!page_set) { - up_write(&pblk_rb_lock); - vfree(entries); - return -ENOMEM; - } - - page_set->order = order; - page_set->pages = alloc_pages(GFP_KERNEL, order); - if (!page_set->pages) { - kfree(page_set); - pblk_rb_data_free(rb); - up_write(&pblk_rb_lock); - vfree(entries); - return -ENOMEM; - } - kaddr = page_address(page_set->pages); - - entry = &rb->entries[init_entry]; - entry->data = kaddr; - entry->cacheline = pblk_cacheline_to_addr(init_entry++); - entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; - - set_size = (1 << order); - for (i = 1; i < set_size; i++) { - entry = &rb->entries[init_entry]; - entry->cacheline = pblk_cacheline_to_addr(init_entry++); - entry->data = kaddr + (i * rb->seg_size); - entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; - bio_list_init(&entry->w_ctx.bios); - } - - list_add_tail(&page_set->list, &rb->pages); - iter--; - } while (iter > 0); - up_write(&pblk_rb_lock); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_set(&rb->inflight_flush_point, 0); -#endif - - /* - * Initialize rate-limiter, which controls access to the write buffer - * by user and GC I/O - */ - pblk_rl_init(&pblk->rl, rb->nr_entries, threshold); - - return 0; -} - -static void clean_wctx(struct pblk_w_ctx *w_ctx) -{ - int flags; - - flags = READ_ONCE(w_ctx->flags); - WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY), - "pblk: overwriting unsubmitted data\n"); - - /* Release flags on context. Protect from writes and reads */ - smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); - pblk_ppa_set_empty(&w_ctx->ppa); - w_ctx->lba = ADDR_EMPTY; -} - -#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size) -#define pblk_rb_ring_space(rb, head, tail, size) \ - (CIRC_SPACE(head, tail, size)) - -/* - * Buffer space is calculated with respect to the back pointer signaling - * synchronized entries to the media. - */ -static unsigned int pblk_rb_space(struct pblk_rb *rb) -{ - unsigned int mem = READ_ONCE(rb->mem); - unsigned int sync = READ_ONCE(rb->sync); - - return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); -} - -unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, - unsigned int nr_entries) -{ - return (p + nr_entries) & (rb->nr_entries - 1); -} - -/* - * Buffer count is calculated with respect to the submission entry signaling the - * entries that are available to send to the media - */ -unsigned int pblk_rb_read_count(struct pblk_rb *rb) -{ - unsigned int mem = READ_ONCE(rb->mem); - unsigned int subm = READ_ONCE(rb->subm); - - return pblk_rb_ring_count(mem, subm, rb->nr_entries); -} - -unsigned int pblk_rb_sync_count(struct pblk_rb *rb) -{ - unsigned int mem = READ_ONCE(rb->mem); - unsigned int sync = READ_ONCE(rb->sync); - - return pblk_rb_ring_count(mem, sync, rb->nr_entries); -} - -unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) -{ - unsigned int subm; - - subm = READ_ONCE(rb->subm); - /* Commit read means updating submission pointer */ - smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries)); - - return subm; -} - -static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_line *line; - struct pblk_rb_entry *entry; - struct pblk_w_ctx *w_ctx; - unsigned int user_io = 0, gc_io = 0; - unsigned int i; - int flags; - - for (i = 0; i < to_update; i++) { - entry = &rb->entries[rb->l2p_update]; - w_ctx = &entry->w_ctx; - - flags = READ_ONCE(entry->w_ctx.flags); - if (flags & PBLK_IOTYPE_USER) - user_io++; - else if (flags & PBLK_IOTYPE_GC) - gc_io++; - else - WARN(1, "pblk: unknown IO type\n"); - - pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, - entry->cacheline); - - line = pblk_ppa_to_line(pblk, w_ctx->ppa); - atomic_dec(&line->sec_to_update); - kref_put(&line->ref, pblk_line_put); - clean_wctx(w_ctx); - rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1); - } - - pblk_rl_out(&pblk->rl, user_io, gc_io); - - return 0; -} - -/* - * When we move the l2p_update pointer, we update the l2p table - lookups will - * point to the physical address instead of to the cacheline in the write buffer - * from this moment on. - */ -static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int mem, unsigned int sync) -{ - unsigned int space, count; - int ret = 0; - - lockdep_assert_held(&rb->w_lock); - - /* Update l2p only as buffer entries are being overwritten */ - space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries); - if (space > nr_entries) - goto out; - - count = nr_entries - space; - /* l2p_update used exclusively under rb->w_lock */ - ret = __pblk_rb_update_l2p(rb, count); - -out: - return ret; -} - -/* - * Update the l2p entry for all sectors stored on the write buffer. This means - * that all future lookups to the l2p table will point to a device address, not - * to the cacheline in the write buffer. - */ -void pblk_rb_sync_l2p(struct pblk_rb *rb) -{ - unsigned int sync; - unsigned int to_update; - - spin_lock(&rb->w_lock); - - /* Protect from reads and writes */ - sync = smp_load_acquire(&rb->sync); - - to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries); - __pblk_rb_update_l2p(rb, to_update); - - spin_unlock(&rb->w_lock); -} - -/* - * Write @nr_entries to ring buffer from @data buffer if there is enough space. - * Typically, 4KB data chunks coming from a bio will be copied to the ring - * buffer, thus the write will fail if not all incoming data can be copied. - * - */ -static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, - struct pblk_rb_entry *entry) -{ - memcpy(entry->data, data, rb->seg_size); - - entry->w_ctx.lba = w_ctx.lba; - entry->w_ctx.ppa = w_ctx.ppa; -} - -void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, unsigned int ring_pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entry; - int flags; - - entry = &rb->entries[ring_pos]; - flags = READ_ONCE(entry->w_ctx.flags); -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Caller must guarantee that the entry is free */ - BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); -#endif - - __pblk_rb_write_entry(rb, data, w_ctx, entry); - - pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline); - flags = w_ctx.flags | PBLK_WRITTEN_DATA; - - /* Release flags on write context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); -} - -void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *line, - u64 paddr, unsigned int ring_pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entry; - int flags; - - entry = &rb->entries[ring_pos]; - flags = READ_ONCE(entry->w_ctx.flags); -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Caller must guarantee that the entry is free */ - BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); -#endif - - __pblk_rb_write_entry(rb, data, w_ctx, entry); - - if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr)) - entry->w_ctx.lba = ADDR_EMPTY; - - flags = w_ctx.flags | PBLK_WRITTEN_DATA; - - /* Release flags on write context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); -} - -static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, - unsigned int pos) -{ - struct pblk_rb_entry *entry; - unsigned int sync, flush_point; - - pblk_rb_sync_init(rb, NULL); - sync = READ_ONCE(rb->sync); - - if (pos == sync) { - pblk_rb_sync_end(rb, NULL); - return 0; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_inc(&rb->inflight_flush_point); -#endif - - flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); - entry = &rb->entries[flush_point]; - - /* Protect flush points */ - smp_store_release(&rb->flush_point, flush_point); - - if (bio) - bio_list_add(&entry->w_ctx.bios, bio); - - pblk_rb_sync_end(rb, NULL); - - return bio ? 1 : 0; -} - -static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos) -{ - unsigned int mem; - unsigned int sync; - unsigned int threshold; - - sync = READ_ONCE(rb->sync); - mem = READ_ONCE(rb->mem); - - threshold = nr_entries + rb->back_thres; - - if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold) - return 0; - - if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) - return 0; - - *pos = mem; - - return 1; -} - -static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos) -{ - if (!__pblk_rb_may_write(rb, nr_entries, pos)) - return 0; - - /* Protect from read count */ - smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries)); - return 1; -} - -void pblk_rb_flush(struct pblk_rb *rb) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - unsigned int mem = READ_ONCE(rb->mem); - - if (pblk_rb_flush_point_set(rb, NULL, mem)) - return; - - pblk_write_kick(pblk); -} - -static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos, struct bio *bio, - int *io_ret) -{ - unsigned int mem; - - if (!__pblk_rb_may_write(rb, nr_entries, pos)) - return 0; - - mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries); - *io_ret = NVM_IO_DONE; - - if (bio->bi_opf & REQ_PREFLUSH) { - struct pblk *pblk = container_of(rb, struct pblk, rwb); - - atomic64_inc(&pblk->nr_flush); - if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem)) - *io_ret = NVM_IO_OK; - } - - /* Protect from read count */ - smp_store_release(&rb->mem, mem); - - return 1; -} - -/* - * Atomically check that (i) there is space on the write buffer for the - * incoming I/O, and (ii) the current I/O type has enough budget in the write - * buffer (rate-limiter). - */ -int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, - unsigned int nr_entries, unsigned int *pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - int io_ret; - - spin_lock(&rb->w_lock); - io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries); - if (io_ret) { - spin_unlock(&rb->w_lock); - return io_ret; - } - - if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) { - spin_unlock(&rb->w_lock); - return NVM_IO_REQUEUE; - } - - pblk_rl_user_in(&pblk->rl, nr_entries); - spin_unlock(&rb->w_lock); - - return io_ret; -} - -/* - * Look at pblk_rb_may_write_user comment - */ -int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - - spin_lock(&rb->w_lock); - if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) { - spin_unlock(&rb->w_lock); - return 0; - } - - if (!pblk_rb_may_write(rb, nr_entries, pos)) { - spin_unlock(&rb->w_lock); - return 0; - } - - pblk_rl_gc_in(&pblk->rl, nr_entries); - spin_unlock(&rb->w_lock); - - return 1; -} - -/* - * Read available entries on rb and add them to the given bio. To avoid a memory - * copy, a page reference to the write buffer is used to be added to the bio. - * - * This function is used by the write thread to form the write bio that will - * persist data on the write buffer to the media. - */ -unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - unsigned int pos, unsigned int nr_entries, - unsigned int count) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct request_queue *q = pblk->dev->q; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = rqd->bio; - struct pblk_rb_entry *entry; - struct page *page; - unsigned int pad = 0, to_read = nr_entries; - unsigned int i; - int flags; - - if (count < nr_entries) { - pad = nr_entries - count; - to_read = count; - } - - /* Add space for packed metadata if in use*/ - pad += (pblk->min_write_pgs - pblk->min_write_pgs_data); - - c_ctx->sentry = pos; - c_ctx->nr_valid = to_read; - c_ctx->nr_padded = pad; - - for (i = 0; i < to_read; i++) { - entry = &rb->entries[pos]; - - /* A write has been allowed into the buffer, but data is still - * being copied to it. It is ok to busy wait. - */ -try: - flags = READ_ONCE(entry->w_ctx.flags); - if (!(flags & PBLK_WRITTEN_DATA)) { - io_schedule(); - goto try; - } - - page = virt_to_page(entry->data); - if (!page) { - pblk_err(pblk, "could not allocate write bio page\n"); - flags &= ~PBLK_WRITTEN_DATA; - flags |= PBLK_SUBMITTED_ENTRY; - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - return NVM_IO_ERR; - } - - if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) != - rb->seg_size) { - pblk_err(pblk, "could not add page to write bio\n"); - flags &= ~PBLK_WRITTEN_DATA; - flags |= PBLK_SUBMITTED_ENTRY; - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - return NVM_IO_ERR; - } - - flags &= ~PBLK_WRITTEN_DATA; - flags |= PBLK_SUBMITTED_ENTRY; - - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - - pos = pblk_rb_ptr_wrap(rb, pos, 1); - } - - if (pad) { - if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) { - pblk_err(pblk, "could not pad page in write bio\n"); - return NVM_IO_ERR; - } - - if (pad < pblk->min_write_pgs) - atomic64_inc(&pblk->pad_dist[pad - 1]); - else - pblk_warn(pblk, "padding more than min. sectors\n"); - - atomic64_add(pad, &pblk->pad_wa); - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(pad, &pblk->padded_writes); -#endif - - return NVM_IO_OK; -} - -/* - * Copy to bio only if the lba matches the one on the given cache entry. - * Otherwise, it means that the entry has been overwritten, and the bio should - * be directed to disk. - */ -int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, - struct ppa_addr ppa) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entry; - struct pblk_w_ctx *w_ctx; - struct ppa_addr l2p_ppa; - u64 pos = pblk_addr_to_cacheline(ppa); - void *data; - int flags; - int ret = 1; - - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Caller must ensure that the access will not cause an overflow */ - BUG_ON(pos >= rb->nr_entries); -#endif - entry = &rb->entries[pos]; - w_ctx = &entry->w_ctx; - flags = READ_ONCE(w_ctx->flags); - - spin_lock(&rb->w_lock); - spin_lock(&pblk->trans_lock); - l2p_ppa = pblk_trans_map_get(pblk, lba); - spin_unlock(&pblk->trans_lock); - - /* Check if the entry has been overwritten or is scheduled to be */ - if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba || - flags & PBLK_WRITABLE_ENTRY) { - ret = 0; - goto out; - } - data = bio_data(bio); - memcpy(data, entry->data, rb->seg_size); - -out: - spin_unlock(&rb->w_lock); - return ret; -} - -struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) -{ - unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0); - - return &rb->entries[entry].w_ctx; -} - -unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags) - __acquires(&rb->s_lock) -{ - if (flags) - spin_lock_irqsave(&rb->s_lock, *flags); - else - spin_lock_irq(&rb->s_lock); - - return rb->sync; -} - -void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags) - __releases(&rb->s_lock) -{ - lockdep_assert_held(&rb->s_lock); - - if (flags) - spin_unlock_irqrestore(&rb->s_lock, *flags); - else - spin_unlock_irq(&rb->s_lock); -} - -unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) -{ - unsigned int sync, flush_point; - lockdep_assert_held(&rb->s_lock); - - sync = READ_ONCE(rb->sync); - flush_point = READ_ONCE(rb->flush_point); - - if (flush_point != EMPTY_ENTRY) { - unsigned int secs_to_flush; - - secs_to_flush = pblk_rb_ring_count(flush_point, sync, - rb->nr_entries); - if (secs_to_flush < nr_entries) { - /* Protect flush points */ - smp_store_release(&rb->flush_point, EMPTY_ENTRY); - } - } - - sync = pblk_rb_ptr_wrap(rb, sync, nr_entries); - - /* Protect from counts */ - smp_store_release(&rb->sync, sync); - - return sync; -} - -/* Calculate how many sectors to submit up to the current flush point. */ -unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) -{ - unsigned int subm, sync, flush_point; - unsigned int submitted, to_flush; - - /* Protect flush points */ - flush_point = smp_load_acquire(&rb->flush_point); - if (flush_point == EMPTY_ENTRY) - return 0; - - /* Protect syncs */ - sync = smp_load_acquire(&rb->sync); - - subm = READ_ONCE(rb->subm); - submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries); - - /* The sync point itself counts as a sector to sync */ - to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1; - - return (submitted < to_flush) ? (to_flush - submitted) : 0; -} - -int pblk_rb_tear_down_check(struct pblk_rb *rb) -{ - struct pblk_rb_entry *entry; - int i; - int ret = 0; - - spin_lock(&rb->w_lock); - spin_lock_irq(&rb->s_lock); - - if ((rb->mem == rb->subm) && (rb->subm == rb->sync) && - (rb->sync == rb->l2p_update) && - (rb->flush_point == EMPTY_ENTRY)) { - goto out; - } - - if (!rb->entries) { - ret = 1; - goto out; - } - - for (i = 0; i < rb->nr_entries; i++) { - entry = &rb->entries[i]; - - if (!entry->data) { - ret = 1; - goto out; - } - } - -out: - spin_unlock_irq(&rb->s_lock); - spin_unlock(&rb->w_lock); - - return ret; -} - -unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos) -{ - return (pos & (rb->nr_entries - 1)); -} - -int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos) -{ - return (pos >= rb->nr_entries); -} - -ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_c_ctx *c; - ssize_t offset; - int queued_entries = 0; - - spin_lock_irq(&rb->s_lock); - list_for_each_entry(c, &pblk->compl_list, list) - queued_entries++; - spin_unlock_irq(&rb->s_lock); - - if (rb->flush_point != EMPTY_ENTRY) - offset = scnprintf(buf, PAGE_SIZE, - "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n", - rb->nr_entries, - rb->mem, - rb->subm, - rb->sync, - rb->l2p_update, -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_read(&rb->inflight_flush_point), -#else - 0, -#endif - rb->flush_point, - pblk_rb_read_count(rb), - pblk_rb_space(rb), - pblk_rb_flush_point_count(rb), - queued_entries); - else - offset = scnprintf(buf, PAGE_SIZE, - "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n", - rb->nr_entries, - rb->mem, - rb->subm, - rb->sync, - rb->l2p_update, -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_read(&rb->inflight_flush_point), -#else - 0, -#endif - pblk_rb_read_count(rb), - pblk_rb_space(rb), - pblk_rb_flush_point_count(rb), - queued_entries); - - return offset; -} diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c deleted file mode 100644 index c28537a489bc..000000000000 --- a/drivers/lightnvm/pblk-read.c +++ /dev/null @@ -1,474 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-read.c - pblk's read path - */ - -#include "pblk.h" - -/* - * There is no guarantee that the value read from cache has not been updated and - * resides at another location in the cache. We guarantee though that if the - * value is read from the cache, it belongs to the mapped lba. In order to - * guarantee and order between writes and reads are ordered, a flush must be - * issued. - */ -static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, - sector_t lba, struct ppa_addr ppa) -{ -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(pblk_ppa_empty(ppa)); - BUG_ON(!pblk_addr_in_cache(ppa)); -#endif - - return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa); -} - -static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct bio *bio, sector_t blba, - bool *from_cache) -{ - void *meta_list = rqd->meta_list; - int nr_secs, i; - -retry: - nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas, - from_cache); - - if (!*from_cache) - goto end; - - for (i = 0; i < nr_secs; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - sector_t lba = blba + i; - - if (pblk_ppa_empty(rqd->ppa_list[i])) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta->lba = addr_empty; - } else if (pblk_addr_in_cache(rqd->ppa_list[i])) { - /* - * Try to read from write buffer. The address is later - * checked on the write buffer to prevent retrieving - * overwritten data. - */ - if (!pblk_read_from_cache(pblk, bio, lba, - rqd->ppa_list[i])) { - if (i == 0) { - /* - * We didn't call with bio_advance() - * yet, so we can just retry. - */ - goto retry; - } else { - /* - * We already call bio_advance() - * so we cannot retry and we need - * to quit that function in order - * to allow caller to handle the bio - * splitting in the current sector - * position. - */ - nr_secs = i; - goto end; - } - } - meta->lba = cpu_to_le64(lba); -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->cache_reads); -#endif - } - bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); - } - -end: - if (pblk_io_aligned(pblk, nr_secs)) - rqd->is_seq = 1; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(nr_secs, &pblk->inflight_reads); -#endif - - return nr_secs; -} - - -static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, - sector_t blba) -{ - void *meta_list = rqd->meta_list; - int nr_lbas = rqd->nr_ppas; - int i; - - if (!pblk_is_oob_meta_supported(pblk)) - return; - - for (i = 0; i < nr_lbas; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - u64 lba = le64_to_cpu(meta->lba); - - if (lba == ADDR_EMPTY) - continue; - - if (lba != blba + i) { -#ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - print_ppa(pblk, &ppa_list[i], "seq", i); -#endif - pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - lba, (u64)blba + i); - WARN_ON(1); - } - } -} - -/* - * There can be holes in the lba list. - */ -static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, - u64 *lba_list, int nr_lbas) -{ - void *meta_lba_list = rqd->meta_list; - int i, j; - - if (!pblk_is_oob_meta_supported(pblk)) - return; - - for (i = 0, j = 0; i < nr_lbas; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, - meta_lba_list, j); - u64 lba = lba_list[i]; - u64 meta_lba; - - if (lba == ADDR_EMPTY) - continue; - - meta_lba = le64_to_cpu(meta->lba); - - if (lba != meta_lba) { -#ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - print_ppa(pblk, &ppa_list[j], "rnd", j); -#endif - pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - meta_lba, lba); - WARN_ON(1); - } - - j++; - } - - WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); -} - -static void pblk_end_user_read(struct bio *bio, int error) -{ - if (error && error != NVM_RSP_WARN_HIGHECC) - bio_io_error(bio); - else - bio_endio(bio); -} - -static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, - bool put_line) -{ - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct bio *int_bio = rqd->bio; - unsigned long start_time = r_ctx->start_time; - - bio_end_io_acct(int_bio, start_time); - - if (rqd->error) - pblk_log_read_err(pblk, rqd); - - pblk_read_check_seq(pblk, rqd, r_ctx->lba); - bio_put(int_bio); - - if (put_line) - pblk_rq_to_line_put(pblk, rqd); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); - atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); -#endif - - pblk_free_rqd(pblk, rqd, PBLK_READ); - atomic_dec(&pblk->inflight_io); -} - -static void pblk_end_io_read(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = (struct bio *)r_ctx->private; - - pblk_end_user_read(bio, rqd->error); - __pblk_end_io_read(pblk, rqd, true); -} - -static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, - sector_t lba, bool *from_cache) -{ - struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); - struct ppa_addr ppa; - - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->inflight_reads); -#endif - -retry: - if (pblk_ppa_empty(ppa)) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta->lba = addr_empty; - return; - } - - /* Try to read from write buffer. The address is later checked on the - * write buffer to prevent retrieving overwritten data. - */ - if (pblk_addr_in_cache(ppa)) { - if (!pblk_read_from_cache(pblk, bio, lba, ppa)) { - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); - goto retry; - } - - meta->lba = cpu_to_le64(lba); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->cache_reads); -#endif - } else { - rqd->ppa_addr = ppa; - } -} - -void pblk_submit_read(struct pblk *pblk, struct bio *bio) -{ - sector_t blba = pblk_get_lba(bio); - unsigned int nr_secs = pblk_get_secs(bio); - bool from_cache; - struct pblk_g_ctx *r_ctx; - struct nvm_rq *rqd; - struct bio *int_bio, *split_bio; - unsigned long start_time; - - start_time = bio_start_io_acct(bio); - - rqd = pblk_alloc_rqd(pblk, PBLK_READ); - - rqd->opcode = NVM_OP_PREAD; - rqd->nr_ppas = nr_secs; - rqd->private = pblk; - rqd->end_io = pblk_end_io_read; - - r_ctx = nvm_rq_to_pdu(rqd); - r_ctx->start_time = start_time; - r_ctx->lba = blba; - - if (pblk_alloc_rqd_meta(pblk, rqd)) { - bio_io_error(bio); - pblk_free_rqd(pblk, rqd, PBLK_READ); - return; - } - - /* Clone read bio to deal internally with: - * -read errors when reading from drive - * -bio_advance() calls during cache reads - */ - int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); - - if (nr_secs > 1) - nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba, - &from_cache); - else - pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache); - -split_retry: - r_ctx->private = bio; /* original bio */ - rqd->bio = int_bio; /* internal bio */ - - if (from_cache && nr_secs == rqd->nr_ppas) { - /* All data was read from cache, we can complete the IO. */ - pblk_end_user_read(bio, 0); - atomic_inc(&pblk->inflight_io); - __pblk_end_io_read(pblk, rqd, false); - } else if (nr_secs != rqd->nr_ppas) { - /* The read bio request could be partially filled by the write - * buffer, but there are some holes that need to be read from - * the drive. In order to handle this, we will use block layer - * mechanism to split this request in to smaller ones and make - * a chain of it. - */ - split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL, - &pblk_bio_set); - bio_chain(split_bio, bio); - submit_bio_noacct(bio); - - /* New bio contains first N sectors of the previous one, so - * we can continue to use existing rqd, but we need to shrink - * the number of PPAs in it. New bio is also guaranteed that - * it contains only either data from cache or from drive, newer - * mix of them. - */ - bio = split_bio; - rqd->nr_ppas = nr_secs; - if (rqd->nr_ppas == 1) - rqd->ppa_addr = rqd->ppa_list[0]; - - /* Recreate int_bio - existing might have some needed internal - * fields modified already. - */ - bio_put(int_bio); - int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); - goto split_retry; - } else if (pblk_submit_io(pblk, rqd, NULL)) { - /* Submitting IO to drive failed, let's report an error */ - rqd->error = -ENODEV; - pblk_end_io_read(rqd); - } -} - -static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_line *line, u64 *lba_list, - u64 *paddr_list_gc, unsigned int nr_secs) -{ - struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA]; - struct ppa_addr ppa_gc; - int valid_secs = 0; - int i; - - pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs); - - for (i = 0; i < nr_secs; i++) { - if (lba_list[i] == ADDR_EMPTY) - continue; - - ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id); - if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) { - paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY; - continue; - } - - rqd->ppa_list[valid_secs++] = ppa_list_l2p[i]; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(valid_secs, &pblk->inflight_reads); -#endif - - return valid_secs; -} - -static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_line *line, sector_t lba, - u64 paddr_gc) -{ - struct ppa_addr ppa_l2p, ppa_gc; - int valid_secs = 0; - - if (lba == ADDR_EMPTY) - goto out; - - /* logic error: lba out-of-bounds */ - if (lba >= pblk->capacity) { - WARN(1, "pblk: read lba out of bounds\n"); - goto out; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - spin_unlock(&pblk->trans_lock); - - ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id); - if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) - goto out; - - rqd->ppa_addr = ppa_l2p; - valid_secs = 1; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->inflight_reads); -#endif - -out: - return valid_secs; -} - -int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) -{ - struct nvm_rq rqd; - int ret = NVM_IO_OK; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = pblk_alloc_rqd_meta(pblk, &rqd); - if (ret) - return ret; - - if (gc_rq->nr_secs > 1) { - gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, - gc_rq->lba_list, - gc_rq->paddr_list, - gc_rq->nr_secs); - if (gc_rq->secs_to_gc == 1) - rqd.ppa_addr = rqd.ppa_list[0]; - } else { - gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line, - gc_rq->lba_list[0], - gc_rq->paddr_list[0]); - } - - if (!(gc_rq->secs_to_gc)) - goto out; - - rqd.opcode = NVM_OP_PREAD; - rqd.nr_ppas = gc_rq->secs_to_gc; - - if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) { - ret = -EIO; - goto err_free_dma; - } - - pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs); - - atomic_dec(&pblk->inflight_io); - - if (rqd.error) { - atomic_long_inc(&pblk->read_failed_gc); -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_print_failed_rqd(pblk, &rqd, rqd.error); -#endif - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads); - atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads); - atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads); -#endif - -out: - pblk_free_rqd_meta(pblk, &rqd); - return ret; - -err_free_dma: - pblk_free_rqd_meta(pblk, &rqd); - return ret; -} diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c deleted file mode 100644 index 0e6f0c76e930..000000000000 --- a/drivers/lightnvm/pblk-recovery.c +++ /dev/null @@ -1,874 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial: Javier Gonzalez - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-recovery.c - pblk's recovery path - * - * The L2P recovery path is single threaded as the L2P table is updated in order - * following the line sequence ID. - */ - -#include "pblk.h" -#include "pblk-trace.h" - -int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) -{ - u32 crc; - - crc = pblk_calc_emeta_crc(pblk, emeta_buf); - if (le32_to_cpu(emeta_buf->crc) != crc) - return 1; - - if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) - return 1; - - return 0; -} - -static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_emeta *emeta = line->emeta; - struct line_emeta *emeta_buf = emeta->buf; - __le64 *lba_list; - u64 data_start, data_end; - u64 nr_valid_lbas, nr_lbas = 0; - u64 i; - - lba_list = emeta_to_lbas(pblk, emeta_buf); - if (!lba_list) - return 1; - - data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - data_end = line->emeta_ssec; - nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); - - for (i = data_start; i < data_end; i++) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, i, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - /* Do not update bad blocks */ - if (test_bit(pos, line->blk_bitmap)) - continue; - - if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) { - spin_lock(&line->lock); - if (test_and_set_bit(i, line->invalid_bitmap)) - WARN_ONCE(1, "pblk: rec. double invalidate:\n"); - else - le32_add_cpu(line->vsc, -1); - spin_unlock(&line->lock); - - continue; - } - - pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa); - nr_lbas++; - } - - if (nr_valid_lbas != nr_lbas) - pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n", - line->id, nr_valid_lbas, nr_lbas); - - line->left_msecs = 0; - - return 0; -} - -static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, - u64 written_secs) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int i; - - for (i = 0; i < written_secs; i += pblk->min_write_pgs) - __pblk_alloc_page(pblk, line, pblk->min_write_pgs); - - spin_lock(&l_mg->free_lock); - if (written_secs > line->left_msecs) { - /* - * We have all data sectors written - * and some emeta sectors written too. - */ - line->left_msecs = 0; - } else { - /* We have only some data sectors written. */ - line->left_msecs -= written_secs; - } - spin_unlock(&l_mg->free_lock); -} - -static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); - u64 written_secs = 0; - int valid_chunks = 0; - int i; - - for (i = 0; i < lm->blk_per_line; i++) { - struct nvm_chk_meta *chunk = &line->chks[i]; - - if (chunk->state & NVM_CHK_ST_OFFLINE) - continue; - - written_secs += chunk->wp; - valid_chunks++; - } - - if (lm->blk_per_line - nr_bb != valid_chunks) - pblk_err(pblk, "recovery line %d is bad\n", line->id); - - pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec); - - return written_secs; -} - -struct pblk_recov_alloc { - struct ppa_addr *ppa_list; - void *meta_list; - struct nvm_rq *rqd; - void *data; - dma_addr_t dma_ppa_list; - dma_addr_t dma_meta_list; -}; - -static void pblk_recov_complete(struct kref *ref) -{ - struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); - - complete(&pad_rq->wait); -} - -static void pblk_end_io_recov(struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - struct pblk_pad_rq *pad_rq = rqd->private; - struct pblk *pblk = pad_rq->pblk; - - pblk_up_chunk(pblk, ppa_list[0]); - - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - - atomic_dec(&pblk->inflight_io); - kref_put(&pad_rq->ref, pblk_recov_complete); -} - -/* pad line using line bitmap. */ -static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, - int left_ppas) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - void *meta_list; - struct pblk_pad_rq *pad_rq; - struct nvm_rq *rqd; - struct ppa_addr *ppa_list; - void *data; - __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); - u64 w_ptr = line->cur_sec; - int left_line_ppas, rq_ppas; - int i, j; - int ret = 0; - - spin_lock(&line->lock); - left_line_ppas = line->left_msecs; - spin_unlock(&line->lock); - - pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL); - if (!pad_rq) - return -ENOMEM; - - data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs)); - if (!data) { - ret = -ENOMEM; - goto free_rq; - } - - pad_rq->pblk = pblk; - init_completion(&pad_rq->wait); - kref_init(&pad_rq->ref); - -next_pad_rq: - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); - if (rq_ppas < pblk->min_write_pgs) { - pblk_err(pblk, "corrupted pad line %d\n", line->id); - goto fail_complete; - } - - rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); - - ret = pblk_alloc_rqd_meta(pblk, rqd); - if (ret) { - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - goto fail_complete; - } - - rqd->bio = NULL; - rqd->opcode = NVM_OP_PWRITE; - rqd->is_seq = 1; - rqd->nr_ppas = rq_ppas; - rqd->end_io = pblk_end_io_recov; - rqd->private = pad_rq; - - ppa_list = nvm_rq_to_ppa_list(rqd); - meta_list = rqd->meta_list; - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - w_ptr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { - struct ppa_addr dev_ppa; - struct pblk_sec_meta *meta; - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - - pblk_map_invalidate(pblk, dev_ppa); - lba_list[w_ptr] = addr_empty; - meta = pblk_get_meta(pblk, meta_list, i); - meta->lba = addr_empty; - ppa_list[i] = dev_ppa; - } - } - - kref_get(&pad_rq->ref); - pblk_down_chunk(pblk, ppa_list[0]); - - ret = pblk_submit_io(pblk, rqd, data); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - pblk_up_chunk(pblk, ppa_list[0]); - kref_put(&pad_rq->ref, pblk_recov_complete); - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - goto fail_complete; - } - - left_line_ppas -= rq_ppas; - left_ppas -= rq_ppas; - if (left_ppas && left_line_ppas) - goto next_pad_rq; - -fail_complete: - kref_put(&pad_rq->ref, pblk_recov_complete); - wait_for_completion(&pad_rq->wait); - - if (!pblk_line_is_full(line)) - pblk_err(pblk, "corrupted padded line: %d\n", line->id); - - vfree(data); -free_rq: - kfree(pad_rq); - return ret; -} - -static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt; - - return (distance > line->left_msecs) ? line->left_msecs : distance; -} - -/* Return a chunk belonging to a line by stripe(write order) index */ -static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk, - struct pblk_line *line, - int index) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - struct ppa_addr ppa; - int pos; - - rlun = &pblk->luns[index]; - ppa = rlun->bppa; - pos = pblk_ppa_to_pos(geo, ppa); - - return &line->chks[pos]; -} - -static int pblk_line_wps_are_unbalanced(struct pblk *pblk, - struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int blk_in_line = lm->blk_per_line; - struct nvm_chk_meta *chunk; - u64 max_wp, min_wp; - int i; - - i = find_first_zero_bit(line->blk_bitmap, blk_in_line); - - /* If there is one or zero good chunks in the line, - * the write pointers can't be unbalanced. - */ - if (i >= (blk_in_line - 1)) - return 0; - - chunk = pblk_get_stripe_chunk(pblk, line, i); - max_wp = chunk->wp; - if (max_wp > pblk->max_write_pgs) - min_wp = max_wp - pblk->max_write_pgs; - else - min_wp = 0; - - i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1); - while (i < blk_in_line) { - chunk = pblk_get_stripe_chunk(pblk, line, i); - if (chunk->wp > max_wp || chunk->wp < min_wp) - return 1; - - i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1); - } - - return 0; -} - -static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - void *meta_list; - struct nvm_rq *rqd; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - __le64 *lba_list; - u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - bool padded = false; - int rq_ppas; - int i, j; - int ret; - u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec; - - if (pblk_line_wps_are_unbalanced(pblk, line)) - pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - -next_rq: - memset(rqd, 0, pblk_g_rq_size); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - -retry_rq: - rqd->bio = NULL; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; - ppa_list = nvm_rq_to_ppa_list(rqd); - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->is_seq = 1; - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, paddr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - paddr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, paddr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++) - ppa_list[i] = - addr_to_gen_ppa(pblk, paddr + j, line->id); - } - - ret = pblk_submit_io_sync(pblk, rqd, data); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* If a read fails, do a best effort by padding the line and retrying */ - if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { - int pad_distance, ret; - - if (padded) { - pblk_log_read_err(pblk, rqd); - return -EINTR; - } - - pad_distance = pblk_pad_distance(pblk, line); - ret = pblk_recov_pad_line(pblk, line, pad_distance); - if (ret) { - return ret; - } - - padded = true; - goto retry_rq; - } - - pblk_get_packed_meta(pblk, rqd); - - for (i = 0; i < rqd->nr_ppas; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - u64 lba = le64_to_cpu(meta->lba); - - lba_list[paddr++] = cpu_to_le64(lba); - - if (lba == ADDR_EMPTY || lba >= pblk->capacity) - continue; - - line->nr_valid_lbas++; - pblk_update_map(pblk, lba, ppa_list[i]); - } - - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_rq; - -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ON(padded && !pblk_line_is_full(line)); -#endif - - return 0; -} - -/* Scan line for lbas on out of bound area */ -static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_rq *rqd; - struct ppa_addr *ppa_list; - void *meta_list; - struct pblk_recov_alloc p; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - int ret = 0; - - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) - return -ENOMEM; - - ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk); - dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); - - data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto free_meta_list; - } - - rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL); - memset(rqd, 0, pblk_g_rq_size); - - p.ppa_list = ppa_list; - p.meta_list = meta_list; - p.rqd = rqd; - p.data = data; - p.dma_ppa_list = dma_ppa_list; - p.dma_meta_list = dma_meta_list; - - ret = pblk_recov_scan_oob(pblk, line, p); - if (ret) { - pblk_err(pblk, "could not recover L2P form OOB\n"); - goto out; - } - - if (pblk_line_is_full(line)) - pblk_line_recov_close(pblk, line); - -out: - mempool_free(rqd, &pblk->r_rq_pool); - kfree(data); -free_meta_list: - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); - - return ret; -} - -/* Insert lines ordered by sequence number (seq_num) on list */ -static void pblk_recov_line_add_ordered(struct list_head *head, - struct pblk_line *line) -{ - struct pblk_line *t = NULL; - - list_for_each_entry(t, head, list) - if (t->seq_nr > line->seq_nr) - break; - - __list_add(&line->list, t->list.prev, &t->list); -} - -static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - unsigned int emeta_secs; - u64 emeta_start; - struct ppa_addr ppa; - int pos; - - emeta_secs = lm->emeta_sec[0]; - emeta_start = lm->sec_per_line; - - while (emeta_secs) { - emeta_start--; - ppa = addr_to_gen_ppa(pblk, emeta_start, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - if (!test_bit(pos, line->blk_bitmap)) - emeta_secs--; - } - - return emeta_start; -} - -static int pblk_recov_check_line_version(struct pblk *pblk, - struct line_emeta *emeta) -{ - struct line_header *header = &emeta->header; - - if (header->version_major != EMETA_VERSION_MAJOR) { - pblk_err(pblk, "line major version mismatch: %d, expected: %d\n", - header->version_major, EMETA_VERSION_MAJOR); - return 1; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - if (header->version_minor > EMETA_VERSION_MINOR) - pblk_info(pblk, "newer line minor version found: %d\n", - header->version_minor); -#endif - - return 0; -} - -static void pblk_recov_wa_counters(struct pblk *pblk, - struct line_emeta *emeta) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct line_header *header = &emeta->header; - struct wa_counters *wa = emeta_to_wa(lm, emeta); - - /* WA counters were introduced in emeta version 0.2 */ - if (header->version_major > 0 || header->version_minor >= 2) { - u64 user = le64_to_cpu(wa->user); - u64 pad = le64_to_cpu(wa->pad); - u64 gc = le64_to_cpu(wa->gc); - - atomic64_set(&pblk->user_wa, user); - atomic64_set(&pblk->pad_wa, pad); - atomic64_set(&pblk->gc_wa, gc); - - pblk->user_rst_wa = user; - pblk->pad_rst_wa = pad; - pblk->gc_rst_wa = gc; - } -} - -static int pblk_line_was_written(struct pblk_line *line, - struct pblk *pblk) -{ - - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_chk_meta *chunk; - struct ppa_addr bppa; - int smeta_blk; - - if (line->state == PBLK_LINESTATE_BAD) - return 0; - - smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (smeta_blk >= lm->blk_per_line) - return 0; - - bppa = pblk->luns[smeta_blk].bppa; - chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; - - if (chunk->state & NVM_CHK_ST_CLOSED || - (chunk->state & NVM_CHK_ST_OPEN - && chunk->wp >= lm->smeta_sec)) - return 1; - - return 0; -} - -static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int i; - - for (i = 0; i < lm->blk_per_line; i++) - if (line->chks[i].state & NVM_CHK_ST_OPEN) - return true; - - return false; -} - -struct pblk_line *pblk_recov_l2p(struct pblk *pblk) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line, *tline, *data_line = NULL; - struct pblk_smeta *smeta; - struct pblk_emeta *emeta; - struct line_smeta *smeta_buf; - int found_lines = 0, recovered_lines = 0, open_lines = 0; - int is_next = 0; - int meta_line; - int i, valid_uuid = 0; - LIST_HEAD(recov_list); - - /* TODO: Implement FTL snapshot */ - - /* Scan recovery - takes place when FTL snapshot fails */ - spin_lock(&l_mg->free_lock); - meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); - set_bit(meta_line, &l_mg->meta_bitmap); - smeta = l_mg->sline_meta[meta_line]; - emeta = l_mg->eline_meta[meta_line]; - smeta_buf = (struct line_smeta *)smeta; - spin_unlock(&l_mg->free_lock); - - /* Order data lines using their sequence number */ - for (i = 0; i < l_mg->nr_lines; i++) { - u32 crc; - - line = &pblk->lines[i]; - - memset(smeta, 0, lm->smeta_len); - line->smeta = smeta; - line->lun_bitmap = ((void *)(smeta_buf)) + - sizeof(struct line_smeta); - - if (!pblk_line_was_written(line, pblk)) - continue; - - /* Lines that cannot be read are assumed as not written here */ - if (pblk_line_smeta_read(pblk, line)) - continue; - - crc = pblk_calc_smeta_crc(pblk, smeta_buf); - if (le32_to_cpu(smeta_buf->crc) != crc) - continue; - - if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) - continue; - - if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) { - pblk_err(pblk, "found incompatible line version %u\n", - smeta_buf->header.version_major); - return ERR_PTR(-EINVAL); - } - - /* The first valid instance uuid is used for initialization */ - if (!valid_uuid) { - import_guid(&pblk->instance_uuid, smeta_buf->header.uuid); - valid_uuid = 1; - } - - if (!guid_equal(&pblk->instance_uuid, - (guid_t *)&smeta_buf->header.uuid)) { - pblk_debug(pblk, "ignore line %u due to uuid mismatch\n", - i); - continue; - } - - /* Update line metadata */ - spin_lock(&line->lock); - line->id = le32_to_cpu(smeta_buf->header.id); - line->type = le16_to_cpu(smeta_buf->header.type); - line->seq_nr = le64_to_cpu(smeta_buf->seq_nr); - spin_unlock(&line->lock); - - /* Update general metadata */ - spin_lock(&l_mg->free_lock); - if (line->seq_nr >= l_mg->d_seq_nr) - l_mg->d_seq_nr = line->seq_nr + 1; - l_mg->nr_free_lines--; - spin_unlock(&l_mg->free_lock); - - if (pblk_line_recov_alloc(pblk, line)) - goto out; - - pblk_recov_line_add_ordered(&recov_list, line); - found_lines++; - pblk_debug(pblk, "recovering data line %d, seq:%llu\n", - line->id, smeta_buf->seq_nr); - } - - if (!found_lines) { - guid_gen(&pblk->instance_uuid); - - spin_lock(&l_mg->free_lock); - WARN_ON_ONCE(!test_and_clear_bit(meta_line, - &l_mg->meta_bitmap)); - spin_unlock(&l_mg->free_lock); - - goto out; - } - - /* Verify closed blocks and recover this portion of L2P table*/ - list_for_each_entry_safe(line, tline, &recov_list, list) { - recovered_lines++; - - line->emeta_ssec = pblk_line_emeta_start(pblk, line); - line->emeta = emeta; - memset(line->emeta->buf, 0, lm->emeta_len[0]); - - if (pblk_line_is_open(pblk, line)) { - pblk_recov_l2p_from_oob(pblk, line); - goto next; - } - - if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) { - pblk_recov_l2p_from_oob(pblk, line); - goto next; - } - - if (pblk_recov_check_emeta(pblk, line->emeta->buf)) { - pblk_recov_l2p_from_oob(pblk, line); - goto next; - } - - if (pblk_recov_check_line_version(pblk, line->emeta->buf)) - return ERR_PTR(-EINVAL); - - pblk_recov_wa_counters(pblk, line->emeta->buf); - - if (pblk_recov_l2p_from_emeta(pblk, line)) - pblk_recov_l2p_from_oob(pblk, line); - -next: - if (pblk_line_is_full(line)) { - struct list_head *move_list; - - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_CLOSED; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - - spin_lock(&l_mg->gc_lock); - list_move_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; - } else { - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_OPEN; - spin_unlock(&line->lock); - - line->emeta->mem = 0; - atomic_set(&line->emeta->sync, 0); - - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - data_line = line; - line->meta_line = meta_line; - - open_lines++; - } - } - - if (!open_lines) { - spin_lock(&l_mg->free_lock); - WARN_ON_ONCE(!test_and_clear_bit(meta_line, - &l_mg->meta_bitmap)); - spin_unlock(&l_mg->free_lock); - } else { - spin_lock(&l_mg->free_lock); - l_mg->data_line = data_line; - /* Allocate next line for preparation */ - l_mg->data_next = pblk_line_get(pblk); - if (l_mg->data_next) { - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; - l_mg->data_next->type = PBLK_LINETYPE_DATA; - is_next = 1; - } - spin_unlock(&l_mg->free_lock); - } - - if (is_next) - pblk_line_erase(pblk, l_mg->data_next); - -out: - if (found_lines != recovered_lines) - pblk_err(pblk, "failed to recover all found lines %d/%d\n", - found_lines, recovered_lines); - - return data_line; -} - -/* - * Pad current line - */ -int pblk_recov_pad(struct pblk *pblk) -{ - struct pblk_line *line; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int left_msecs; - int ret = 0; - - spin_lock(&l_mg->free_lock); - line = l_mg->data_line; - left_msecs = line->left_msecs; - spin_unlock(&l_mg->free_lock); - - ret = pblk_recov_pad_line(pblk, line, left_msecs); - if (ret) { - pblk_err(pblk, "tear down padding failed (%d)\n", ret); - return ret; - } - - pblk_line_close_meta(pblk, line); - return ret; -} diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c deleted file mode 100644 index a5f8bc2defbc..000000000000 --- a/drivers/lightnvm/pblk-rl.c +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-rl.c - pblk's rate limiter for user I/O - * - */ - -#include "pblk.h" - -static void pblk_rl_kick_u_timer(struct pblk_rl *rl) -{ - mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); -} - -int pblk_rl_is_limit(struct pblk_rl *rl) -{ - int rb_space; - - rb_space = atomic_read(&rl->rb_space); - - return (rb_space == 0); -} - -int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) -{ - int rb_user_cnt = atomic_read(&rl->rb_user_cnt); - int rb_space = atomic_read(&rl->rb_space); - - if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0)) - return NVM_IO_ERR; - - if (rb_user_cnt >= rl->rb_user_max) - return NVM_IO_REQUEUE; - - return NVM_IO_OK; -} - -void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries) -{ - int rb_space = atomic_read(&rl->rb_space); - - if (unlikely(rb_space >= 0)) - atomic_sub(nr_entries, &rl->rb_space); -} - -int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) -{ - int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt); - int rb_user_active; - - /* If there is no user I/O let GC take over space on the write buffer */ - rb_user_active = READ_ONCE(rl->rb_user_active); - return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active)); -} - -void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) -{ - atomic_add(nr_entries, &rl->rb_user_cnt); - - /* Release user I/O state. Protect from GC */ - smp_store_release(&rl->rb_user_active, 1); - pblk_rl_kick_u_timer(rl); -} - -void pblk_rl_werr_line_in(struct pblk_rl *rl) -{ - atomic_inc(&rl->werr_lines); -} - -void pblk_rl_werr_line_out(struct pblk_rl *rl) -{ - atomic_dec(&rl->werr_lines); -} - -void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) -{ - atomic_add(nr_entries, &rl->rb_gc_cnt); -} - -void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc) -{ - atomic_sub(nr_user, &rl->rb_user_cnt); - atomic_sub(nr_gc, &rl->rb_gc_cnt); -} - -unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) -{ - return atomic_read(&rl->free_blocks); -} - -unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl) -{ - return atomic_read(&rl->free_user_blocks); -} - -static void __pblk_rl_update_rates(struct pblk_rl *rl, - unsigned long free_blocks) -{ - struct pblk *pblk = container_of(rl, struct pblk, rl); - int max = rl->rb_budget; - int werr_gc_needed = atomic_read(&rl->werr_lines); - - if (free_blocks >= rl->high) { - if (werr_gc_needed) { - /* Allocate a small budget for recovering - * lines with write errors - */ - rl->rb_gc_max = 1 << rl->rb_windows_pw; - rl->rb_user_max = max - rl->rb_gc_max; - rl->rb_state = PBLK_RL_WERR; - } else { - rl->rb_user_max = max; - rl->rb_gc_max = 0; - rl->rb_state = PBLK_RL_OFF; - } - } else if (free_blocks < rl->high) { - int shift = rl->high_pw - rl->rb_windows_pw; - int user_windows = free_blocks >> shift; - int user_max = user_windows << ilog2(NVM_MAX_VLBA); - - rl->rb_user_max = user_max; - rl->rb_gc_max = max - user_max; - - if (free_blocks <= rl->rsv_blocks) { - rl->rb_user_max = 0; - rl->rb_gc_max = max; - } - - /* In the worst case, we will need to GC lines in the low list - * (high valid sector count). If there are lines to GC on high - * or mid lists, these will be prioritized - */ - rl->rb_state = PBLK_RL_LOW; - } - - if (rl->rb_state != PBLK_RL_OFF) - pblk_gc_should_start(pblk); - else - pblk_gc_should_stop(pblk); -} - -void pblk_rl_update_rates(struct pblk_rl *rl) -{ - __pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl)); -} - -void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) -{ - int blk_in_line = atomic_read(&line->blk_in_line); - int free_blocks; - - atomic_add(blk_in_line, &rl->free_blocks); - free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks); - - __pblk_rl_update_rates(rl, free_blocks); -} - -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, - bool used) -{ - int blk_in_line = atomic_read(&line->blk_in_line); - int free_blocks; - - atomic_sub(blk_in_line, &rl->free_blocks); - - if (used) - free_blocks = atomic_sub_return(blk_in_line, - &rl->free_user_blocks); - else - free_blocks = atomic_read(&rl->free_user_blocks); - - __pblk_rl_update_rates(rl, free_blocks); -} - -int pblk_rl_high_thrs(struct pblk_rl *rl) -{ - return rl->high; -} - -int pblk_rl_max_io(struct pblk_rl *rl) -{ - return rl->rb_max_io; -} - -static void pblk_rl_u_timer(struct timer_list *t) -{ - struct pblk_rl *rl = from_timer(rl, t, u_timer); - - /* Release user I/O state. Protect from GC */ - smp_store_release(&rl->rb_user_active, 0); -} - -void pblk_rl_free(struct pblk_rl *rl) -{ - del_timer(&rl->u_timer); -} - -void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold) -{ - struct pblk *pblk = container_of(rl, struct pblk, rl); - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - int sec_meta, blk_meta; - unsigned int rb_windows; - - /* Consider sectors used for metadata */ - sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; - blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - - rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; - rl->high_pw = get_count_order(rl->high); - - rl->rsv_blocks = pblk_get_min_chks(pblk); - - /* This will always be a power-of-2 */ - rb_windows = budget / NVM_MAX_VLBA; - rl->rb_windows_pw = get_count_order(rb_windows); - - /* To start with, all buffer is available to user I/O writers */ - rl->rb_budget = budget; - rl->rb_user_max = budget; - rl->rb_gc_max = 0; - rl->rb_state = PBLK_RL_HIGH; - - /* Maximize I/O size and ansure that back threshold is respected */ - if (threshold) - rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold; - else - rl->rb_max_io = budget - pblk->min_write_pgs_data - 1; - - atomic_set(&rl->rb_user_cnt, 0); - atomic_set(&rl->rb_gc_cnt, 0); - atomic_set(&rl->rb_space, -1); - atomic_set(&rl->werr_lines, 0); - - timer_setup(&rl->u_timer, pblk_rl_u_timer, 0); - - rl->rb_user_active = 0; - rl->rb_gc_active = 0; -} diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c deleted file mode 100644 index 6387302b03f2..000000000000 --- a/drivers/lightnvm/pblk-sysfs.c +++ /dev/null @@ -1,728 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a physical block-device target for Open-channel SSDs. - * - * pblk-sysfs.c - pblk's sysfs - * - */ - -#include "pblk.h" - -static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - ssize_t sz = 0; - int i; - - for (i = 0; i < geo->all_luns; i++) { - int active = 1; - - rlun = &pblk->luns[i]; - if (!down_trylock(&rlun->wr_sem)) { - active = 0; - up(&rlun->wr_sem); - } - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "pblk: pos:%d, ch:%d, lun:%d - %d\n", - i, - rlun->bppa.a.ch, - rlun->bppa.a.lun, - active); - } - - return sz; -} - -static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) -{ - int free_blocks, free_user_blocks, total_blocks; - int rb_user_max, rb_user_cnt; - int rb_gc_max, rb_gc_cnt, rb_budget, rb_state; - - free_blocks = pblk_rl_nr_free_blks(&pblk->rl); - free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl); - rb_user_max = pblk->rl.rb_user_max; - rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); - rb_gc_max = pblk->rl.rb_gc_max; - rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); - rb_budget = pblk->rl.rb_budget; - rb_state = pblk->rl.rb_state; - - total_blocks = pblk->rl.total_blocks; - - return snprintf(page, PAGE_SIZE, - "u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n", - rb_user_cnt, - rb_user_max, - rb_gc_cnt, - rb_gc_max, - rb_state, - rb_budget, - pblk->rl.high, - free_blocks, - free_user_blocks, - total_blocks, - READ_ONCE(pblk->rl.rb_user_active)); -} - -static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page) -{ - int gc_enabled, gc_active; - - pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active); - return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n", - gc_enabled, gc_active); -} - -static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page) -{ - ssize_t sz; - - sz = snprintf(page, PAGE_SIZE, - "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n", - atomic_long_read(&pblk->read_failed), - atomic_long_read(&pblk->read_high_ecc), - atomic_long_read(&pblk->read_empty), - atomic_long_read(&pblk->read_failed_gc), - atomic_long_read(&pblk->write_failed), - atomic_long_read(&pblk->erase_failed)); - - return sz; -} - -static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page) -{ - return pblk_rb_sysfs(&pblk->rwb, page); -} - -static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - ssize_t sz = 0; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; - struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf; - - sz = scnprintf(page, PAGE_SIZE, - "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", - pblk->addrf_len, - ppaf->blk_offset, ppaf->blk_len, - ppaf->pg_offset, ppaf->pg_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->ch_offset, ppaf->ch_len, - ppaf->pln_offset, ppaf->pln_len, - ppaf->sec_offset, ppaf->sec_len); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", - gppaf->blk_offset, gppaf->blk_len, - gppaf->pg_offset, gppaf->pg_len, - gppaf->lun_offset, gppaf->lun_len, - gppaf->ch_offset, gppaf->ch_len, - gppaf->pln_offset, gppaf->pln_len, - gppaf->sec_offset, gppaf->sec_len); - } else { - struct nvm_addrf *ppaf = &pblk->addrf; - struct nvm_addrf *gppaf = &geo->addrf; - - sz = scnprintf(page, PAGE_SIZE, - "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n", - pblk->addrf_len, - ppaf->ch_offset, ppaf->ch_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->chk_offset, ppaf->chk_len, - ppaf->sec_offset, ppaf->sec_len); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n", - gppaf->ch_offset, gppaf->ch_len, - gppaf->lun_offset, gppaf->lun_len, - gppaf->chk_offset, gppaf->chk_len, - gppaf->sec_offset, gppaf->sec_len); - } - - return sz; -} - -static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - ssize_t sz = 0; - int nr_free_lines; - int cur_data, cur_log; - int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0; - int d_line_cnt = 0, l_line_cnt = 0; - int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; - int gc_werr = 0; - - int bad = 0, cor = 0; - int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; - int map_weight = 0, meta_weight = 0; - - spin_lock(&l_mg->free_lock); - cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1; - cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1; - nr_free_lines = l_mg->nr_free_lines; - - list_for_each_entry(line, &l_mg->free_list, list) - free_line_cnt++; - spin_unlock(&l_mg->free_lock); - - spin_lock(&l_mg->close_lock); - list_for_each_entry(line, &l_mg->emeta_list, list) - emeta_line_cnt++; - spin_unlock(&l_mg->close_lock); - - spin_lock(&l_mg->gc_lock); - list_for_each_entry(line, &l_mg->gc_full_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_full++; - } - - list_for_each_entry(line, &l_mg->gc_high_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_high++; - } - - list_for_each_entry(line, &l_mg->gc_mid_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_mid++; - } - - list_for_each_entry(line, &l_mg->gc_low_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_low++; - } - - list_for_each_entry(line, &l_mg->gc_empty_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_empty++; - } - - list_for_each_entry(line, &l_mg->gc_werr_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_werr++; - } - - list_for_each_entry(line, &l_mg->bad_list, list) - bad++; - list_for_each_entry(line, &l_mg->corrupt_list, list) - cor++; - spin_unlock(&l_mg->gc_lock); - - spin_lock(&l_mg->free_lock); - if (l_mg->data_line) { - cur_sec = l_mg->data_line->cur_sec; - msecs = l_mg->data_line->left_msecs; - vsc = le32_to_cpu(*l_mg->data_line->vsc); - sec_in_line = l_mg->data_line->sec_in_line; - meta_weight = bitmap_weight(&l_mg->meta_bitmap, - PBLK_DATA_LINES); - - spin_lock(&l_mg->data_line->lock); - if (l_mg->data_line->map_bitmap) - map_weight = bitmap_weight(l_mg->data_line->map_bitmap, - lm->sec_per_line); - else - map_weight = 0; - spin_unlock(&l_mg->data_line->lock); - } - spin_unlock(&l_mg->free_lock); - - if (nr_free_lines != free_line_cnt) - pblk_err(pblk, "corrupted free line list:%d/%d\n", - nr_free_lines, free_line_cnt); - - sz = scnprintf(page, PAGE_SIZE - sz, - "line: nluns:%d, nblks:%d, nsecs:%d\n", - geo->all_luns, lm->blk_per_line, lm->sec_per_line); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", - cur_data, cur_log, - nr_free_lines, - emeta_line_cnt, meta_weight, - closed_line_cnt, - bad, cor, - d_line_cnt, l_line_cnt, - l_mg->nr_lines); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", - gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, - atomic_read(&pblk->gc.read_inflight_gc)); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", - cur_data, cur_sec, msecs, vsc, sec_in_line, - map_weight, lm->sec_per_line, - atomic_read(&pblk->inflight_io)); - - return sz; -} - -static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - ssize_t sz = 0; - - sz = scnprintf(page, PAGE_SIZE - sz, - "smeta - len:%d, secs:%d\n", - lm->smeta_len, lm->smeta_sec); - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "emeta - len:%d, sec:%d, bb_start:%d\n", - lm->emeta_len[0], lm->emeta_sec[0], - lm->emeta_bb); - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "bitmap lengths: sec:%d, blk:%d, lun:%d\n", - lm->sec_bitmap_len, - lm->blk_bitmap_len, - lm->lun_bitmap_len); - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "blk_line:%d, sec_line:%d, sec_blk:%d\n", - lm->blk_per_line, - lm->sec_per_line, - geo->clba); - - return sz; -} - -static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page) -{ - return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write); -} - -static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, - char *page) -{ - int sz; - - sz = scnprintf(page, PAGE_SIZE, - "user:%lld gc:%lld pad:%lld WA:", - user, gc, pad); - - if (!user) { - sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); - } else { - u64 wa_int; - u32 wa_frac; - - wa_int = (user + gc + pad) * 100000; - wa_int = div64_u64(wa_int, user); - wa_int = div_u64_rem(wa_int, 100000, &wa_frac); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", - wa_int, wa_frac); - } - - return sz; -} - -static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page) -{ - return pblk_get_write_amp(atomic64_read(&pblk->user_wa), - atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa), - page); -} - -static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page) -{ - return pblk_get_write_amp( - atomic64_read(&pblk->user_wa) - pblk->user_rst_wa, - atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa, - atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page); -} - -static long long bucket_percentage(unsigned long long bucket, - unsigned long long total) -{ - int p = bucket * 100; - - p = div_u64(p, total); - - return p; -} - -static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) -{ - int sz = 0; - unsigned long long total; - unsigned long long total_buckets = 0; - int buckets = pblk->min_write_pgs - 1; - int i; - - total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst; - if (!total) { - for (i = 0; i < (buckets + 1); i++) - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "%d:0 ", i); - sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); - - return sz; - } - - for (i = 0; i < buckets; i++) - total_buckets += atomic64_read(&pblk->pad_dist[i]); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", - bucket_percentage(total - total_buckets, total)); - - for (i = 0; i < buckets; i++) { - unsigned long long p; - - p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]), - total); - sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", - i + 1, p); - } - sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); - - return sz; -} - -#ifdef CONFIG_NVM_PBLK_DEBUG -static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) -{ - return snprintf(page, PAGE_SIZE, - "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", - atomic_long_read(&pblk->inflight_writes), - atomic_long_read(&pblk->inflight_reads), - atomic_long_read(&pblk->req_writes), - (u64)atomic64_read(&pblk->nr_flush), - atomic_long_read(&pblk->padded_writes), - atomic_long_read(&pblk->padded_wb), - atomic_long_read(&pblk->sub_writes), - atomic_long_read(&pblk->sync_writes), - atomic_long_read(&pblk->recov_writes), - atomic_long_read(&pblk->recov_gc_writes), - atomic_long_read(&pblk->recov_gc_reads), - atomic_long_read(&pblk->cache_reads), - atomic_long_read(&pblk->sync_reads)); -} -#endif - -static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, - size_t len) -{ - size_t c_len; - int force; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &force)) - return -EINVAL; - - pblk_gc_sysfs_force(pblk, force); - - return len; -} - -static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk, - const char *page, size_t len) -{ - size_t c_len; - int sec_per_write; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &sec_per_write)) - return -EINVAL; - - if (!pblk_is_oob_meta_supported(pblk)) { - /* For packed metadata case it is - * not allowed to change sec_per_write. - */ - return -EINVAL; - } - - if (sec_per_write < pblk->min_write_pgs - || sec_per_write > pblk->max_write_pgs - || sec_per_write % pblk->min_write_pgs != 0) - return -EINVAL; - - pblk_set_sec_per_write(pblk, sec_per_write); - - return len; -} - -static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk, - const char *page, size_t len) -{ - size_t c_len; - int reset_value; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &reset_value)) - return -EINVAL; - - if (reset_value != 0) - return -EINVAL; - - pblk->user_rst_wa = atomic64_read(&pblk->user_wa); - pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa); - pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa); - - return len; -} - - -static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk, - const char *page, size_t len) -{ - size_t c_len; - int reset_value; - int buckets = pblk->min_write_pgs - 1; - int i; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &reset_value)) - return -EINVAL; - - if (reset_value != 0) - return -EINVAL; - - for (i = 0; i < buckets; i++) - atomic64_set(&pblk->pad_dist[i], 0); - - pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush); - - return len; -} - -static struct attribute sys_write_luns = { - .name = "write_luns", - .mode = 0444, -}; - -static struct attribute sys_rate_limiter_attr = { - .name = "rate_limiter", - .mode = 0444, -}; - -static struct attribute sys_gc_state = { - .name = "gc_state", - .mode = 0444, -}; - -static struct attribute sys_errors_attr = { - .name = "errors", - .mode = 0444, -}; - -static struct attribute sys_rb_attr = { - .name = "write_buffer", - .mode = 0444, -}; - -static struct attribute sys_stats_ppaf_attr = { - .name = "ppa_format", - .mode = 0444, -}; - -static struct attribute sys_lines_attr = { - .name = "lines", - .mode = 0444, -}; - -static struct attribute sys_lines_info_attr = { - .name = "lines_info", - .mode = 0444, -}; - -static struct attribute sys_gc_force = { - .name = "gc_force", - .mode = 0200, -}; - -static struct attribute sys_max_sec_per_write = { - .name = "max_sec_per_write", - .mode = 0644, -}; - -static struct attribute sys_write_amp_mileage = { - .name = "write_amp_mileage", - .mode = 0444, -}; - -static struct attribute sys_write_amp_trip = { - .name = "write_amp_trip", - .mode = 0644, -}; - -static struct attribute sys_padding_dist = { - .name = "padding_dist", - .mode = 0644, -}; - -#ifdef CONFIG_NVM_PBLK_DEBUG -static struct attribute sys_stats_debug_attr = { - .name = "stats", - .mode = 0444, -}; -#endif - -static struct attribute *pblk_attrs[] = { - &sys_write_luns, - &sys_rate_limiter_attr, - &sys_errors_attr, - &sys_gc_state, - &sys_gc_force, - &sys_max_sec_per_write, - &sys_rb_attr, - &sys_stats_ppaf_attr, - &sys_lines_attr, - &sys_lines_info_attr, - &sys_write_amp_mileage, - &sys_write_amp_trip, - &sys_padding_dist, -#ifdef CONFIG_NVM_PBLK_DEBUG - &sys_stats_debug_attr, -#endif - NULL, -}; - -static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct pblk *pblk = container_of(kobj, struct pblk, kobj); - - if (strcmp(attr->name, "rate_limiter") == 0) - return pblk_sysfs_rate_limiter(pblk, buf); - else if (strcmp(attr->name, "write_luns") == 0) - return pblk_sysfs_luns_show(pblk, buf); - else if (strcmp(attr->name, "gc_state") == 0) - return pblk_sysfs_gc_state_show(pblk, buf); - else if (strcmp(attr->name, "errors") == 0) - return pblk_sysfs_stats(pblk, buf); - else if (strcmp(attr->name, "write_buffer") == 0) - return pblk_sysfs_write_buffer(pblk, buf); - else if (strcmp(attr->name, "ppa_format") == 0) - return pblk_sysfs_ppaf(pblk, buf); - else if (strcmp(attr->name, "lines") == 0) - return pblk_sysfs_lines(pblk, buf); - else if (strcmp(attr->name, "lines_info") == 0) - return pblk_sysfs_lines_info(pblk, buf); - else if (strcmp(attr->name, "max_sec_per_write") == 0) - return pblk_sysfs_get_sec_per_write(pblk, buf); - else if (strcmp(attr->name, "write_amp_mileage") == 0) - return pblk_sysfs_get_write_amp_mileage(pblk, buf); - else if (strcmp(attr->name, "write_amp_trip") == 0) - return pblk_sysfs_get_write_amp_trip(pblk, buf); - else if (strcmp(attr->name, "padding_dist") == 0) - return pblk_sysfs_get_padding_dist(pblk, buf); -#ifdef CONFIG_NVM_PBLK_DEBUG - else if (strcmp(attr->name, "stats") == 0) - return pblk_sysfs_stats_debug(pblk, buf); -#endif - return 0; -} - -static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct pblk *pblk = container_of(kobj, struct pblk, kobj); - - if (strcmp(attr->name, "gc_force") == 0) - return pblk_sysfs_gc_force(pblk, buf, len); - else if (strcmp(attr->name, "max_sec_per_write") == 0) - return pblk_sysfs_set_sec_per_write(pblk, buf, len); - else if (strcmp(attr->name, "write_amp_trip") == 0) - return pblk_sysfs_set_write_amp_trip(pblk, buf, len); - else if (strcmp(attr->name, "padding_dist") == 0) - return pblk_sysfs_set_padding_dist(pblk, buf, len); - return 0; -} - -static const struct sysfs_ops pblk_sysfs_ops = { - .show = pblk_sysfs_show, - .store = pblk_sysfs_store, -}; - -static struct kobj_type pblk_ktype = { - .sysfs_ops = &pblk_sysfs_ops, - .default_attrs = pblk_attrs, -}; - -int pblk_sysfs_init(struct gendisk *tdisk) -{ - struct pblk *pblk = tdisk->private_data; - struct device *parent_dev = disk_to_dev(pblk->disk); - int ret; - - ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype, - kobject_get(&parent_dev->kobj), - "%s", "pblk"); - if (ret) { - pblk_err(pblk, "could not register\n"); - return ret; - } - - kobject_uevent(&pblk->kobj, KOBJ_ADD); - return 0; -} - -void pblk_sysfs_exit(struct gendisk *tdisk) -{ - struct pblk *pblk = tdisk->private_data; - - kobject_uevent(&pblk->kobj, KOBJ_REMOVE); - kobject_del(&pblk->kobj); - kobject_put(&pblk->kobj); -} diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h deleted file mode 100644 index 47b67c6bff7a..000000000000 --- a/drivers/lightnvm/pblk-trace.h +++ /dev/null @@ -1,145 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM pblk - -#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PBLK_H - -#include - -struct ppa_addr; - -#define show_chunk_flags(state) __print_flags(state, "", \ - { NVM_CHK_ST_FREE, "FREE", }, \ - { NVM_CHK_ST_CLOSED, "CLOSED", }, \ - { NVM_CHK_ST_OPEN, "OPEN", }, \ - { NVM_CHK_ST_OFFLINE, "OFFLINE", }) - -#define show_line_state(state) __print_symbolic(state, \ - { PBLK_LINESTATE_NEW, "NEW", }, \ - { PBLK_LINESTATE_FREE, "FREE", }, \ - { PBLK_LINESTATE_OPEN, "OPEN", }, \ - { PBLK_LINESTATE_CLOSED, "CLOSED", }, \ - { PBLK_LINESTATE_GC, "GC", }, \ - { PBLK_LINESTATE_BAD, "BAD", }, \ - { PBLK_LINESTATE_CORRUPT, "CORRUPT" }) - - -#define show_pblk_state(state) __print_symbolic(state, \ - { PBLK_STATE_RUNNING, "RUNNING", }, \ - { PBLK_STATE_STOPPING, "STOPPING", }, \ - { PBLK_STATE_RECOVERING, "RECOVERING", }, \ - { PBLK_STATE_STOPPED, "STOPPED" }) - -#define show_chunk_erase_state(state) __print_symbolic(state, \ - { PBLK_CHUNK_RESET_START, "START", }, \ - { PBLK_CHUNK_RESET_DONE, "OK", }, \ - { PBLK_CHUNK_RESET_FAILED, "FAILED" }) - - -TRACE_EVENT(pblk_chunk_reset, - - TP_PROTO(const char *name, struct ppa_addr *ppa, int state), - - TP_ARGS(name, ppa, state), - - TP_STRUCT__entry( - __string(name, name) - __field(u64, ppa) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->ppa = ppa->ppa; - __entry->state = state; - ), - - TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), - show_chunk_erase_state((int)__entry->state)) - -); - -TRACE_EVENT(pblk_chunk_state, - - TP_PROTO(const char *name, struct ppa_addr *ppa, int state), - - TP_ARGS(name, ppa, state), - - TP_STRUCT__entry( - __string(name, name) - __field(u64, ppa) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->ppa = ppa->ppa; - __entry->state = state; - ), - - TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), - show_chunk_flags((int)__entry->state)) - -); - -TRACE_EVENT(pblk_line_state, - - TP_PROTO(const char *name, int line, int state), - - TP_ARGS(name, line, state), - - TP_STRUCT__entry( - __string(name, name) - __field(int, line) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->line = line; - __entry->state = state; - ), - - TP_printk("dev=%s line=%d state=%s", __get_str(name), - (int)__entry->line, - show_line_state((int)__entry->state)) - -); - -TRACE_EVENT(pblk_state, - - TP_PROTO(const char *name, int state), - - TP_ARGS(name, state), - - TP_STRUCT__entry( - __string(name, name) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->state = state; - ), - - TP_printk("dev=%s state=%s", __get_str(name), - show_pblk_state((int)__entry->state)) - -); - -#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ - -/* This part must be outside protection */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../drivers/lightnvm -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE pblk-trace -#include diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c deleted file mode 100644 index b9a2aeba95ab..000000000000 --- a/drivers/lightnvm/pblk-write.c +++ /dev/null @@ -1,665 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez - * Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-write.c - pblk's write path from write buffer to media - */ - -#include "pblk.h" -#include "pblk-trace.h" - -static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - struct bio *original_bio; - struct pblk_rb *rwb = &pblk->rwb; - unsigned long ret; - int i; - - for (i = 0; i < c_ctx->nr_valid; i++) { - struct pblk_w_ctx *w_ctx; - int pos = c_ctx->sentry + i; - int flags; - - w_ctx = pblk_rb_w_ctx(rwb, pos); - flags = READ_ONCE(w_ctx->flags); - - if (flags & PBLK_FLUSH_ENTRY) { - flags &= ~PBLK_FLUSH_ENTRY; - /* Release flags on context. Protect from writes */ - smp_store_release(&w_ctx->flags, flags); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_dec(&rwb->inflight_flush_point); -#endif - } - - while ((original_bio = bio_list_pop(&w_ctx->bios))) - bio_endio(original_bio); - } - - if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, - c_ctx->nr_padded); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(rqd->nr_ppas, &pblk->sync_writes); -#endif - - ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); - - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - - return ret; -} - -static unsigned long pblk_end_queued_w_bio(struct pblk *pblk, - struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - list_del(&c_ctx->list); - return pblk_end_w_bio(pblk, rqd, c_ctx); -} - -static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - struct pblk_c_ctx *c, *r; - unsigned long flags; - unsigned long pos; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); -#endif - pblk_up_rq(pblk, c_ctx->lun_bitmap); - - pos = pblk_rb_sync_init(&pblk->rwb, &flags); - if (pos == c_ctx->sentry) { - pos = pblk_end_w_bio(pblk, rqd, c_ctx); - -retry: - list_for_each_entry_safe(c, r, &pblk->compl_list, list) { - rqd = nvm_rq_from_c_ctx(c); - if (c->sentry == pos) { - pos = pblk_end_queued_w_bio(pblk, rqd, c); - goto retry; - } - } - } else { - WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd); - list_add_tail(&c_ctx->list, &pblk->compl_list); - } - pblk_rb_sync_end(&pblk->rwb, &flags); -} - -/* Map remaining sectors in chunk, starting from ppa */ -static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa, - int rqd_ppas) -{ - struct pblk_line *line; - struct ppa_addr map_ppa = *ppa; - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - __le64 *lba_list; - u64 paddr; - int done = 0; - int n = 0; - - line = pblk_ppa_to_line(pblk, *ppa); - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - - spin_lock(&line->lock); - - while (!done) { - paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa); - - if (!test_and_set_bit(paddr, line->map_bitmap)) - line->left_msecs--; - - if (n < rqd_ppas && lba_list[paddr] != addr_empty) - line->nr_valid_lbas--; - - lba_list[paddr] = addr_empty; - - if (!test_and_set_bit(paddr, line->invalid_bitmap)) - le32_add_cpu(line->vsc, -1); - - done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); - - n++; - } - - line->w_err_gc->has_write_err = 1; - spin_unlock(&line->lock); -} - -static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, - unsigned int nr_entries) -{ - struct pblk_rb *rb = &pblk->rwb; - struct pblk_rb_entry *entry; - struct pblk_line *line; - struct pblk_w_ctx *w_ctx; - struct ppa_addr ppa_l2p; - int flags; - unsigned int i; - - spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_entries; i++) { - entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)]; - w_ctx = &entry->w_ctx; - - /* Check if the lba has been overwritten */ - if (w_ctx->lba != ADDR_EMPTY) { - ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); - if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) - w_ctx->lba = ADDR_EMPTY; - } - - /* Mark up the entry as submittable again */ - flags = READ_ONCE(w_ctx->flags); - flags |= PBLK_WRITTEN_DATA; - /* Release flags on write context. Protect from writes */ - smp_store_release(&w_ctx->flags, flags); - - /* Decrease the reference count to the line as we will - * re-map these entries - */ - line = pblk_ppa_to_line(pblk, w_ctx->ppa); - atomic_dec(&line->sec_to_update); - kref_put(&line->ref, pblk_line_put); - } - spin_unlock(&pblk->trans_lock); -} - -static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx) -{ - struct pblk_c_ctx *r_ctx; - - r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL); - if (!r_ctx) - return; - - r_ctx->lun_bitmap = NULL; - r_ctx->sentry = c_ctx->sentry; - r_ctx->nr_valid = c_ctx->nr_valid; - r_ctx->nr_padded = c_ctx->nr_padded; - - spin_lock(&pblk->resubmit_lock); - list_add_tail(&r_ctx->list, &pblk->resubmit_list); - spin_unlock(&pblk->resubmit_lock); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes); -#endif -} - -static void pblk_submit_rec(struct work_struct *work) -{ - struct pblk_rec_ctx *recovery = - container_of(work, struct pblk_rec_ctx, ws_rec); - struct pblk *pblk = recovery->pblk; - struct nvm_rq *rqd = recovery->rqd; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - pblk_log_write_err(pblk, rqd); - - pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas); - pblk_queue_resubmit(pblk, c_ctx); - - pblk_up_rq(pblk, c_ctx->lun_bitmap); - if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, - c_ctx->nr_padded); - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - mempool_free(recovery, &pblk->rec_pool); - - atomic_dec(&pblk->inflight_io); - pblk_write_kick(pblk); -} - - -static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct pblk_rec_ctx *recovery; - - recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC); - if (!recovery) { - pblk_err(pblk, "could not allocate recovery work\n"); - return; - } - - recovery->pblk = pblk; - recovery->rqd = rqd; - - INIT_WORK(&recovery->ws_rec, pblk_submit_rec); - queue_work(pblk->close_wq, &recovery->ws_rec); -} - -static void pblk_end_io_write(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - - if (rqd->error) { - pblk_end_w_fail(pblk, rqd); - return; - } else { - if (trace_pblk_chunk_state_enabled()) - pblk_check_chunk_state_update(pblk, rqd); -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); -#endif - } - - pblk_complete_write(pblk, rqd, c_ctx); - atomic_dec(&pblk->inflight_io); -} - -static void pblk_end_io_write_meta(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); - struct pblk_line *line = m_ctx->private; - struct pblk_emeta *emeta = line->emeta; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - int sync; - - pblk_up_chunk(pblk, ppa_list[0]); - - if (rqd->error) { - pblk_log_write_err(pblk, rqd); - pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id); - line->w_err_gc->has_write_err = 1; - } else { - if (trace_pblk_chunk_state_enabled()) - pblk_check_chunk_state_update(pblk, rqd); - } - - sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); - if (sync == emeta->nr_entries) - pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws, - GFP_ATOMIC, pblk->close_wq); - - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - - atomic_dec(&pblk->inflight_io); -} - -static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int nr_secs, nvm_end_io_fn(*end_io)) -{ - /* Setup write request */ - rqd->opcode = NVM_OP_PWRITE; - rqd->nr_ppas = nr_secs; - rqd->is_seq = 1; - rqd->private = pblk; - rqd->end_io = end_io; - - return pblk_alloc_rqd_meta(pblk, rqd); -} - -static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct ppa_addr *erase_ppa) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *e_line = pblk_line_get_erase(pblk); - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - unsigned int valid = c_ctx->nr_valid; - unsigned int padded = c_ctx->nr_padded; - unsigned int nr_secs = valid + padded; - unsigned long *lun_bitmap; - int ret; - - lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); - if (!lun_bitmap) - return -ENOMEM; - c_ctx->lun_bitmap = lun_bitmap; - - ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write); - if (ret) { - kfree(lun_bitmap); - return ret; - } - - if (likely(!e_line || !atomic_read(&e_line->left_eblks))) - ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, - valid, 0); - else - ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, - valid, erase_ppa); - - return ret; -} - -static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, - unsigned int secs_to_flush) -{ - int secs_to_sync; - - secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true); - -#ifdef CONFIG_NVM_PBLK_DEBUG - if ((!secs_to_sync && secs_to_flush) - || (secs_to_sync < 0) - || (secs_to_sync > secs_avail && !secs_to_flush)) { - pblk_err(pblk, "bad sector calculation (a:%d,s:%d,f:%d)\n", - secs_avail, secs_to_sync, secs_to_flush); - } -#endif - - return secs_to_sync; -} - -int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_emeta *emeta = meta_line->emeta; - struct ppa_addr *ppa_list; - struct pblk_g_ctx *m_ctx; - struct nvm_rq *rqd; - void *data; - u64 paddr; - int rq_ppas = pblk->min_write_pgs; - int id = meta_line->id; - int rq_len; - int i, j; - int ret; - - rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); - - m_ctx = nvm_rq_to_pdu(rqd); - m_ctx->private = meta_line; - - rq_len = rq_ppas * geo->csecs; - data = ((void *)emeta->buf) + emeta->mem; - - ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta); - if (ret) - goto fail_free_rqd; - - ppa_list = nvm_rq_to_ppa_list(rqd); - for (i = 0; i < rqd->nr_ppas; ) { - spin_lock(&meta_line->lock); - paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas); - spin_unlock(&meta_line->lock); - for (j = 0; j < rq_ppas; j++, i++, paddr++) - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); - } - - spin_lock(&l_mg->close_lock); - emeta->mem += rq_len; - if (emeta->mem >= lm->emeta_len[0]) - list_del(&meta_line->list); - spin_unlock(&l_mg->close_lock); - - pblk_down_chunk(pblk, ppa_list[0]); - - ret = pblk_submit_io(pblk, rqd, data); - if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); - goto fail_rollback; - } - - return NVM_IO_OK; - -fail_rollback: - pblk_up_chunk(pblk, ppa_list[0]); - spin_lock(&l_mg->close_lock); - pblk_dealloc_page(pblk, meta_line, rq_ppas); - list_add(&meta_line->list, &meta_line->list); - spin_unlock(&l_mg->close_lock); -fail_free_rqd: - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - return ret; -} - -static inline bool pblk_valid_meta_ppa(struct pblk *pblk, - struct pblk_line *meta_line, - struct nvm_rq *data_rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd); - struct pblk_line *data_line = pblk_line_get_data(pblk); - struct ppa_addr ppa, ppa_opt; - u64 paddr; - int pos_opt; - - /* Schedule a metadata I/O that is half the distance from the data I/O - * with regards to the number of LUNs forming the pblk instance. This - * balances LUN conflicts across every I/O. - * - * When the LUN configuration changes (e.g., due to GC), this distance - * can align, which would result on metadata and data I/Os colliding. In - * this case, modify the distance to not be optimal, but move the - * optimal in the right direction. - */ - paddr = pblk_lookup_page(pblk, meta_line); - ppa = addr_to_gen_ppa(pblk, paddr, 0); - ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); - pos_opt = pblk_ppa_to_pos(geo, ppa_opt); - - if (test_bit(pos_opt, data_c_ctx->lun_bitmap) || - test_bit(pos_opt, data_line->blk_bitmap)) - return true; - - if (unlikely(pblk_ppa_comp(ppa_opt, ppa))) - data_line->meta_distance--; - - return false; -} - -static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, - struct nvm_rq *data_rqd) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *meta_line; - - spin_lock(&l_mg->close_lock); - if (list_empty(&l_mg->emeta_list)) { - spin_unlock(&l_mg->close_lock); - return NULL; - } - meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (meta_line->emeta->mem >= lm->emeta_len[0]) { - spin_unlock(&l_mg->close_lock); - return NULL; - } - spin_unlock(&l_mg->close_lock); - - if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) - return NULL; - - return meta_line; -} - -static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr erase_ppa; - struct pblk_line *meta_line; - int err; - - pblk_ppa_set_empty(&erase_ppa); - - /* Assign lbas to ppas and populate request structure */ - err = pblk_setup_w_rq(pblk, rqd, &erase_ppa); - if (err) { - pblk_err(pblk, "could not setup write request: %d\n", err); - return NVM_IO_ERR; - } - - meta_line = pblk_should_submit_meta_io(pblk, rqd); - - /* Submit data write for current data line */ - err = pblk_submit_io(pblk, rqd, NULL); - if (err) { - pblk_err(pblk, "data I/O submission failed: %d\n", err); - return NVM_IO_ERR; - } - - if (!pblk_ppa_empty(erase_ppa)) { - /* Submit erase for next data line */ - if (pblk_blk_erase_async(pblk, erase_ppa)) { - struct pblk_line *e_line = pblk_line_get_erase(pblk); - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int bit; - - atomic_inc(&e_line->left_eblks); - bit = pblk_ppa_to_pos(geo, erase_ppa); - WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap)); - } - } - - if (meta_line) { - /* Submit metadata write for previous data line */ - err = pblk_submit_meta_io(pblk, meta_line); - if (err) { - pblk_err(pblk, "metadata I/O submission failed: %d", - err); - return NVM_IO_ERR; - } - } - - return NVM_IO_OK; -} - -static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = rqd->bio; - - if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid, - c_ctx->nr_padded); -} - -static int pblk_submit_write(struct pblk *pblk, int *secs_left) -{ - struct bio *bio; - struct nvm_rq *rqd; - unsigned int secs_avail, secs_to_sync, secs_to_com; - unsigned int secs_to_flush, packed_meta_pgs; - unsigned long pos; - unsigned int resubmit; - - *secs_left = 0; - - spin_lock(&pblk->resubmit_lock); - resubmit = !list_empty(&pblk->resubmit_list); - spin_unlock(&pblk->resubmit_lock); - - /* Resubmit failed writes first */ - if (resubmit) { - struct pblk_c_ctx *r_ctx; - - spin_lock(&pblk->resubmit_lock); - r_ctx = list_first_entry(&pblk->resubmit_list, - struct pblk_c_ctx, list); - list_del(&r_ctx->list); - spin_unlock(&pblk->resubmit_lock); - - secs_avail = r_ctx->nr_valid; - pos = r_ctx->sentry; - - pblk_prepare_resubmit(pblk, pos, secs_avail); - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, - secs_avail); - - kfree(r_ctx); - } else { - /* If there are no sectors in the cache, - * flushes (bios without data) will be cleared on - * the cache threads - */ - secs_avail = pblk_rb_read_count(&pblk->rwb); - if (!secs_avail) - return 0; - - secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); - if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data) - return 0; - - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, - secs_to_flush); - if (secs_to_sync > pblk->max_write_pgs) { - pblk_err(pblk, "bad buffer sync calculation\n"); - return 0; - } - - secs_to_com = (secs_to_sync > secs_avail) ? - secs_avail : secs_to_sync; - pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); - } - - packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data); - bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); - rqd->bio = bio; - - if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync, - secs_avail)) { - pblk_err(pblk, "corrupted write bio\n"); - goto fail_put_bio; - } - - if (pblk_submit_io_set(pblk, rqd)) - goto fail_free_bio; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(secs_to_sync, &pblk->sub_writes); -#endif - - *secs_left = 1; - return 0; - -fail_free_bio: - pblk_free_write_rqd(pblk, rqd); -fail_put_bio: - bio_put(bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - - return -EINTR; -} - -int pblk_write_ts(void *data) -{ - struct pblk *pblk = data; - int secs_left; - int write_failure = 0; - - while (!kthread_should_stop()) { - if (!write_failure) { - write_failure = pblk_submit_write(pblk, &secs_left); - - if (secs_left) - continue; - } - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - - return 0; -} diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h deleted file mode 100644 index 86ffa875bfe1..000000000000 --- a/drivers/lightnvm/pblk.h +++ /dev/null @@ -1,1358 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) - * Copyright (C) 2016 CNEX Labs - * Initial release: Matias Bjorling - * Write buffering: Javier Gonzalez - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a Physical Block-device target for Open-channel SSDs. - * - */ - -#ifndef PBLK_H_ -#define PBLK_H_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* Run only GC if less than 1/X blocks are free */ -#define GC_LIMIT_INVERSE 5 -#define GC_TIME_MSECS 1000 - -#define PBLK_SECTOR (512) -#define PBLK_EXPOSED_PAGE_SIZE (4096) - -#define PBLK_NR_CLOSE_JOBS (4) - -#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) - -/* Max 512 LUNs per device */ -#define PBLK_MAX_LUNS_BITMAP (4) - -#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR) - -/* Static pool sizes */ -#define PBLK_GEN_WS_POOL_SIZE (2) - -#define PBLK_DEFAULT_OP (11) - -enum { - PBLK_READ = READ, - PBLK_WRITE = WRITE,/* Write from write buffer */ - PBLK_WRITE_INT, /* Internal write - no write buffer */ - PBLK_READ_RECOV, /* Recovery read - errors allowed */ - PBLK_ERASE, -}; - -enum { - /* IO Types */ - PBLK_IOTYPE_USER = 1 << 0, - PBLK_IOTYPE_GC = 1 << 1, - - /* Write buffer flags */ - PBLK_FLUSH_ENTRY = 1 << 2, - PBLK_WRITTEN_DATA = 1 << 3, - PBLK_SUBMITTED_ENTRY = 1 << 4, - PBLK_WRITABLE_ENTRY = 1 << 5, -}; - -enum { - PBLK_BLK_ST_OPEN = 0x1, - PBLK_BLK_ST_CLOSED = 0x2, -}; - -enum { - PBLK_CHUNK_RESET_START, - PBLK_CHUNK_RESET_DONE, - PBLK_CHUNK_RESET_FAILED, -}; - -struct pblk_sec_meta { - u64 reserved; - __le64 lba; -}; - -/* The number of GC lists and the rate-limiter states go together. This way the - * rate-limiter can dictate how much GC is needed based on resource utilization. - */ -#define PBLK_GC_NR_LISTS 4 - -enum { - PBLK_RL_OFF = 0, - PBLK_RL_WERR = 1, - PBLK_RL_HIGH = 2, - PBLK_RL_MID = 3, - PBLK_RL_LOW = 4 -}; - -#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) - -/* write buffer completion context */ -struct pblk_c_ctx { - struct list_head list; /* Head for out-of-order completion */ - - unsigned long *lun_bitmap; /* Luns used on current request */ - unsigned int sentry; - unsigned int nr_valid; - unsigned int nr_padded; -}; - -/* read context */ -struct pblk_g_ctx { - void *private; - unsigned long start_time; - u64 lba; -}; - -/* Pad context */ -struct pblk_pad_rq { - struct pblk *pblk; - struct completion wait; - struct kref ref; -}; - -/* Recovery context */ -struct pblk_rec_ctx { - struct pblk *pblk; - struct nvm_rq *rqd; - struct work_struct ws_rec; -}; - -/* Write context */ -struct pblk_w_ctx { - struct bio_list bios; /* Original bios - used for completion - * in REQ_FUA, REQ_FLUSH case - */ - u64 lba; /* Logic addr. associated with entry */ - struct ppa_addr ppa; /* Physic addr. associated with entry */ - int flags; /* Write context flags */ -}; - -struct pblk_rb_entry { - struct ppa_addr cacheline; /* Cacheline for this entry */ - void *data; /* Pointer to data on this entry */ - struct pblk_w_ctx w_ctx; /* Context for this entry */ - struct list_head index; /* List head to enable indexes */ -}; - -#define EMPTY_ENTRY (~0U) - -struct pblk_rb_pages { - struct page *pages; - int order; - struct list_head list; -}; - -struct pblk_rb { - struct pblk_rb_entry *entries; /* Ring buffer entries */ - unsigned int mem; /* Write offset - points to next - * writable entry in memory - */ - unsigned int subm; /* Read offset - points to last entry - * that has been submitted to the media - * to be persisted - */ - unsigned int sync; /* Synced - backpointer that signals - * the last submitted entry that has - * been successfully persisted to media - */ - unsigned int flush_point; /* Sync point - last entry that must be - * flushed to the media. Used with - * REQ_FLUSH and REQ_FUA - */ - unsigned int l2p_update; /* l2p update point - next entry for - * which l2p mapping will be updated to - * contain a device ppa address (instead - * of a cacheline - */ - unsigned int nr_entries; /* Number of entries in write buffer - - * must be a power of two - */ - unsigned int seg_size; /* Size of the data segments being - * stored on each entry. Typically this - * will be 4KB - */ - - unsigned int back_thres; /* Threshold that shall be maintained by - * the backpointer in order to respect - * geo->mw_cunits on a per chunk basis - */ - - struct list_head pages; /* List of data pages */ - - spinlock_t w_lock; /* Write lock */ - spinlock_t s_lock; /* Sync lock */ - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_t inflight_flush_point; /* Not served REQ_FLUSH | REQ_FUA */ -#endif -}; - -#define PBLK_RECOVERY_SECTORS 16 - -struct pblk_lun { - struct ppa_addr bppa; - struct semaphore wr_sem; -}; - -struct pblk_gc_rq { - struct pblk_line *line; - void *data; - u64 paddr_list[NVM_MAX_VLBA]; - u64 lba_list[NVM_MAX_VLBA]; - int nr_secs; - int secs_to_gc; - struct list_head list; -}; - -struct pblk_gc { - /* These states are not protected by a lock since (i) they are in the - * fast path, and (ii) they are not critical. - */ - int gc_active; - int gc_enabled; - int gc_forced; - - struct task_struct *gc_ts; - struct task_struct *gc_writer_ts; - struct task_struct *gc_reader_ts; - - struct workqueue_struct *gc_line_reader_wq; - struct workqueue_struct *gc_reader_wq; - - struct timer_list gc_timer; - - struct semaphore gc_sem; - atomic_t read_inflight_gc; /* Number of lines with inflight GC reads */ - atomic_t pipeline_gc; /* Number of lines in the GC pipeline - - * started reads to finished writes - */ - int w_entries; - - struct list_head w_list; - struct list_head r_list; - - spinlock_t lock; - spinlock_t w_lock; - spinlock_t r_lock; -}; - -struct pblk_rl { - unsigned int high; /* Upper threshold for rate limiter (free run - - * user I/O rate limiter - */ - unsigned int high_pw; /* High rounded up as a power of 2 */ - -#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */ -#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */ - - int rb_windows_pw; /* Number of rate windows in the write buffer - * given as a power-of-2. This guarantees that - * when user I/O is being rate limited, there - * will be reserved enough space for the GC to - * place its payload. A window is of - * pblk->max_write_pgs size, which in NVMe is - * 64, i.e., 256kb. - */ - int rb_budget; /* Total number of entries available for I/O */ - int rb_user_max; /* Max buffer entries available for user I/O */ - int rb_gc_max; /* Max buffer entries available for GC I/O */ - int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ - int rb_state; /* Rate-limiter current state */ - int rb_max_io; /* Maximum size for an I/O giving the config */ - - atomic_t rb_user_cnt; /* User I/O buffer counter */ - atomic_t rb_gc_cnt; /* GC I/O buffer counter */ - atomic_t rb_space; /* Space limit in case of reaching capacity */ - - int rsv_blocks; /* Reserved blocks for GC */ - - int rb_user_active; - int rb_gc_active; - - atomic_t werr_lines; /* Number of write error lines that needs gc */ - - struct timer_list u_timer; - - unsigned long total_blocks; - - atomic_t free_blocks; /* Total number of free blocks (+ OP) */ - atomic_t free_user_blocks; /* Number of user free blocks (no OP) */ -}; - -#define PBLK_LINE_EMPTY (~0U) - -enum { - /* Line Types */ - PBLK_LINETYPE_FREE = 0, - PBLK_LINETYPE_LOG = 1, - PBLK_LINETYPE_DATA = 2, - - /* Line state */ - PBLK_LINESTATE_NEW = 9, - PBLK_LINESTATE_FREE = 10, - PBLK_LINESTATE_OPEN = 11, - PBLK_LINESTATE_CLOSED = 12, - PBLK_LINESTATE_GC = 13, - PBLK_LINESTATE_BAD = 14, - PBLK_LINESTATE_CORRUPT = 15, - - /* GC group */ - PBLK_LINEGC_NONE = 20, - PBLK_LINEGC_EMPTY = 21, - PBLK_LINEGC_LOW = 22, - PBLK_LINEGC_MID = 23, - PBLK_LINEGC_HIGH = 24, - PBLK_LINEGC_FULL = 25, - PBLK_LINEGC_WERR = 26 -}; - -#define PBLK_MAGIC 0x70626c6b /*pblk*/ - -/* emeta/smeta persistent storage format versions: - * Changes in major version requires offline migration. - * Changes in minor version are handled automatically during - * recovery. - */ - -#define SMETA_VERSION_MAJOR (0) -#define SMETA_VERSION_MINOR (1) - -#define EMETA_VERSION_MAJOR (0) -#define EMETA_VERSION_MINOR (2) - -struct line_header { - __le32 crc; - __le32 identifier; /* pblk identifier */ - __u8 uuid[16]; /* instance uuid */ - __le16 type; /* line type */ - __u8 version_major; /* version major */ - __u8 version_minor; /* version minor */ - __le32 id; /* line id for current line */ -}; - -struct line_smeta { - struct line_header header; - - __le32 crc; /* Full structure including struct crc */ - /* Previous line metadata */ - __le32 prev_id; /* Line id for previous line */ - - /* Current line metadata */ - __le64 seq_nr; /* Sequence number for current line */ - - /* Active writers */ - __le32 window_wr_lun; /* Number of parallel LUNs to write */ - - __le32 rsvd[2]; - - __le64 lun_bitmap[]; -}; - - -/* - * Metadata layout in media: - * First sector: - * 1. struct line_emeta - * 2. bad block bitmap (u64 * window_wr_lun) - * 3. write amplification counters - * Mid sectors (start at lbas_sector): - * 3. nr_lbas (u64) forming lba list - * Last sectors (start at vsc_sector): - * 4. u32 valid sector count (vsc) for all lines (~0U: free line) - */ -struct line_emeta { - struct line_header header; - - __le32 crc; /* Full structure including struct crc */ - - /* Previous line metadata */ - __le32 prev_id; /* Line id for prev line */ - - /* Current line metadata */ - __le64 seq_nr; /* Sequence number for current line */ - - /* Active writers */ - __le32 window_wr_lun; /* Number of parallel LUNs to write */ - - /* Bookkeeping for recovery */ - __le32 next_id; /* Line id for next line */ - __le64 nr_lbas; /* Number of lbas mapped in line */ - __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ - __le64 bb_bitmap[]; /* Updated bad block bitmap for line */ -}; - - -/* Write amplification counters stored on media */ -struct wa_counters { - __le64 user; /* Number of user written sectors */ - __le64 gc; /* Number of sectors written by GC*/ - __le64 pad; /* Number of padded sectors */ -}; - -struct pblk_emeta { - struct line_emeta *buf; /* emeta buffer in media format */ - int mem; /* Write offset - points to next - * writable entry in memory - */ - atomic_t sync; /* Synced - backpointer that signals the - * last entry that has been successfully - * persisted to media - */ - unsigned int nr_entries; /* Number of emeta entries */ -}; - -struct pblk_smeta { - struct line_smeta *buf; /* smeta buffer in persistent format */ -}; - -struct pblk_w_err_gc { - int has_write_err; - int has_gc_err; - __le64 *lba_list; -}; - -struct pblk_line { - struct pblk *pblk; - unsigned int id; /* Line number corresponds to the - * block line - */ - unsigned int seq_nr; /* Unique line sequence number */ - - int state; /* PBLK_LINESTATE_X */ - int type; /* PBLK_LINETYPE_X */ - int gc_group; /* PBLK_LINEGC_X */ - struct list_head list; /* Free, GC lists */ - - unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ - - struct nvm_chk_meta *chks; /* Chunks forming line */ - - struct pblk_smeta *smeta; /* Start metadata */ - struct pblk_emeta *emeta; /* End medatada */ - - int meta_line; /* Metadata line id */ - int meta_distance; /* Distance between data and metadata */ - - u64 emeta_ssec; /* Sector where emeta starts */ - - unsigned int sec_in_line; /* Number of usable secs in line */ - - atomic_t blk_in_line; /* Number of good blocks in line */ - unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */ - unsigned long *erase_bitmap; /* Bitmap for erased blocks */ - - unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */ - unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */ - - atomic_t left_eblks; /* Blocks left for erasing */ - atomic_t left_seblks; /* Blocks left for sync erasing */ - - int left_msecs; /* Sectors left for mapping */ - unsigned int cur_sec; /* Sector map pointer */ - unsigned int nr_valid_lbas; /* Number of valid lbas in line */ - - __le32 *vsc; /* Valid sector count in line */ - - struct kref ref; /* Write buffer L2P references */ - atomic_t sec_to_update; /* Outstanding L2P updates to ppa */ - - struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */ - - spinlock_t lock; /* Necessary for invalid_bitmap only */ -}; - -#define PBLK_DATA_LINES 4 - -enum { - PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */ - PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */ - PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */ -}; - -struct pblk_line_mgmt { - int nr_lines; /* Total number of full lines */ - int nr_free_lines; /* Number of full lines in free list */ - - /* Free lists - use free_lock */ - struct list_head free_list; /* Full lines ready to use */ - struct list_head corrupt_list; /* Full lines corrupted */ - struct list_head bad_list; /* Full lines bad */ - - /* GC lists - use gc_lock */ - struct list_head *gc_lists[PBLK_GC_NR_LISTS]; - struct list_head gc_high_list; /* Full lines ready to GC, high isc */ - struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ - struct list_head gc_low_list; /* Full lines ready to GC, low isc */ - - struct list_head gc_werr_list; /* Write err recovery list */ - - struct list_head gc_full_list; /* Full lines ready to GC, no valid */ - struct list_head gc_empty_list; /* Full lines close, all valid */ - - struct pblk_line *log_line; /* Current FTL log line */ - struct pblk_line *data_line; /* Current data line */ - struct pblk_line *log_next; /* Next FTL log line */ - struct pblk_line *data_next; /* Next data line */ - - struct list_head emeta_list; /* Lines queued to schedule emeta */ - - __le32 *vsc_list; /* Valid sector counts for all lines */ - - /* Pre-allocated metadata for data lines */ - struct pblk_smeta *sline_meta[PBLK_DATA_LINES]; - struct pblk_emeta *eline_meta[PBLK_DATA_LINES]; - unsigned long meta_bitmap; - - /* Cache and mempool for map/invalid bitmaps */ - struct kmem_cache *bitmap_cache; - mempool_t *bitmap_pool; - - /* Helpers for fast bitmap calculations */ - unsigned long *bb_template; - unsigned long *bb_aux; - - unsigned long d_seq_nr; /* Data line unique sequence number */ - unsigned long l_seq_nr; /* Log line unique sequence number */ - - spinlock_t free_lock; - spinlock_t close_lock; - spinlock_t gc_lock; -}; - -struct pblk_line_meta { - unsigned int smeta_len; /* Total length for smeta */ - unsigned int smeta_sec; /* Sectors needed for smeta */ - - unsigned int emeta_len[4]; /* Lengths for emeta: - * [0]: Total - * [1]: struct line_emeta + - * bb_bitmap + struct wa_counters - * [2]: L2P portion - * [3]: vsc - */ - unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout - * as emeta_len - */ - - unsigned int emeta_bb; /* Boundary for bb that affects emeta */ - - unsigned int vsc_list_len; /* Length for vsc list */ - unsigned int sec_bitmap_len; /* Length for sector bitmap in line */ - unsigned int blk_bitmap_len; /* Length for block bitmap in line */ - unsigned int lun_bitmap_len; /* Length for lun bitmap in line */ - - unsigned int blk_per_line; /* Number of blocks in a full line */ - unsigned int sec_per_line; /* Number of sectors in a line */ - unsigned int dsec_per_line; /* Number of data sectors in a line */ - unsigned int min_blk_line; /* Min. number of good blocks in line */ - - unsigned int mid_thrs; /* Threshold for GC mid list */ - unsigned int high_thrs; /* Threshold for GC high list */ - - unsigned int meta_distance; /* Distance between data and metadata */ -}; - -enum { - PBLK_STATE_RUNNING = 0, - PBLK_STATE_STOPPING = 1, - PBLK_STATE_RECOVERING = 2, - PBLK_STATE_STOPPED = 3, -}; - -/* Internal format to support not power-of-2 device formats */ -struct pblk_addrf { - /* gen to dev */ - int sec_stripe; - int ch_stripe; - int lun_stripe; - - /* dev to gen */ - int sec_lun_stripe; - int sec_ws_stripe; -}; - -struct pblk { - struct nvm_tgt_dev *dev; - struct gendisk *disk; - - struct kobject kobj; - - struct pblk_lun *luns; - - struct pblk_line *lines; /* Line array */ - struct pblk_line_mgmt l_mg; /* Line management */ - struct pblk_line_meta lm; /* Line metadata */ - - struct nvm_addrf addrf; /* Aligned address format */ - struct pblk_addrf uaddrf; /* Unaligned address format */ - int addrf_len; - - struct pblk_rb rwb; - - int state; /* pblk line state */ - - int min_write_pgs; /* Minimum amount of pages required by controller */ - int min_write_pgs_data; /* Minimum amount of payload pages */ - int max_write_pgs; /* Maximum amount of pages supported by controller */ - int oob_meta_size; /* Size of OOB sector metadata */ - - sector_t capacity; /* Device capacity when bad blocks are subtracted */ - - int op; /* Percentage of device used for over-provisioning */ - int op_blks; /* Number of blocks used for over-provisioning */ - - /* pblk provisioning values. Used by rate limiter */ - struct pblk_rl rl; - - int sec_per_write; - - guid_t instance_uuid; - - /* Persistent write amplification counters, 4kb sector I/Os */ - atomic64_t user_wa; /* Sectors written by user */ - atomic64_t gc_wa; /* Sectors written by GC */ - atomic64_t pad_wa; /* Padded sectors written */ - - /* Reset values for delta write amplification measurements */ - u64 user_rst_wa; - u64 gc_rst_wa; - u64 pad_rst_wa; - - /* Counters used for calculating padding distribution */ - atomic64_t *pad_dist; /* Padding distribution buckets */ - u64 nr_flush_rst; /* Flushes reset value for pad dist.*/ - atomic64_t nr_flush; /* Number of flush/fua I/O */ - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Non-persistent debug counters, 4kb sector I/Os */ - atomic_long_t inflight_writes; /* Inflight writes (user and gc) */ - atomic_long_t padded_writes; /* Sectors padded due to flush/fua */ - atomic_long_t padded_wb; /* Sectors padded in write buffer */ - atomic_long_t req_writes; /* Sectors stored on write buffer */ - atomic_long_t sub_writes; /* Sectors submitted from buffer */ - atomic_long_t sync_writes; /* Sectors synced to media */ - atomic_long_t inflight_reads; /* Inflight sector read requests */ - atomic_long_t cache_reads; /* Read requests that hit the cache */ - atomic_long_t sync_reads; /* Completed sector read requests */ - atomic_long_t recov_writes; /* Sectors submitted from recovery */ - atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */ - atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */ -#endif - - spinlock_t lock; - - atomic_long_t read_failed; - atomic_long_t read_empty; - atomic_long_t read_high_ecc; - atomic_long_t read_failed_gc; - atomic_long_t write_failed; - atomic_long_t erase_failed; - - atomic_t inflight_io; /* General inflight I/O counter */ - - struct task_struct *writer_ts; - - /* Simple translation map of logical addresses to physical addresses. - * The logical addresses is known by the host system, while the physical - * addresses are used when writing to the disk block device. - */ - unsigned char *trans_map; - spinlock_t trans_lock; - - struct list_head compl_list; - - spinlock_t resubmit_lock; /* Resubmit list lock */ - struct list_head resubmit_list; /* Resubmit list for failed writes*/ - - mempool_t page_bio_pool; - mempool_t gen_ws_pool; - mempool_t rec_pool; - mempool_t r_rq_pool; - mempool_t w_rq_pool; - mempool_t e_rq_pool; - - struct workqueue_struct *close_wq; - struct workqueue_struct *bb_wq; - struct workqueue_struct *r_end_wq; - - struct timer_list wtimer; - - struct pblk_gc gc; -}; - -struct pblk_line_ws { - struct pblk *pblk; - struct pblk_line *line; - void *priv; - struct work_struct ws; -}; - -#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx)) -#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx)) - -#define pblk_err(pblk, fmt, ...) \ - pr_err("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) -#define pblk_info(pblk, fmt, ...) \ - pr_info("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) -#define pblk_warn(pblk, fmt, ...) \ - pr_warn("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) -#define pblk_debug(pblk, fmt, ...) \ - pr_debug("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) - -/* - * pblk ring buffer operations - */ -int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, - unsigned int seg_sz); -int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, - unsigned int nr_entries, unsigned int *pos); -int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos); -void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, unsigned int pos); -void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *line, - u64 paddr, unsigned int pos); -struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); -void pblk_rb_flush(struct pblk_rb *rb); - -void pblk_rb_sync_l2p(struct pblk_rb *rb); -unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - unsigned int pos, unsigned int nr_entries, - unsigned int count); -int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, - struct ppa_addr ppa); -unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); - -unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); -unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); -unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, - unsigned int nr_entries); -void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); -unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); - -unsigned int pblk_rb_read_count(struct pblk_rb *rb); -unsigned int pblk_rb_sync_count(struct pblk_rb *rb); -unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); - -int pblk_rb_tear_down_check(struct pblk_rb *rb); -int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); -void pblk_rb_free(struct pblk_rb *rb); -ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); - -/* - * pblk core - */ -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); -int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); -int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx); -void pblk_discard(struct pblk *pblk, struct bio *bio); -struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); -struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, - struct nvm_chk_meta *lp, - struct ppa_addr ppa); -void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); -int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf); -int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf); -int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); -void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd); -struct pblk_line *pblk_line_get(struct pblk *pblk); -struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); -struct pblk_line *pblk_line_replace_data(struct pblk *pblk); -void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa); -void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd); -int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); -void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); -struct pblk_line *pblk_line_get_data(struct pblk *pblk); -struct pblk_line *pblk_line_get_erase(struct pblk *pblk); -int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); -int pblk_line_is_full(struct pblk_line *line); -void pblk_line_free(struct pblk_line *line); -void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); -void pblk_line_close(struct pblk *pblk, struct pblk_line *line); -void pblk_line_close_ws(struct work_struct *work); -void pblk_pipeline_stop(struct pblk *pblk); -void __pblk_pipeline_stop(struct pblk *pblk); -void __pblk_pipeline_flush(struct pblk *pblk); -void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), gfp_t gfp_mask, - struct workqueue_struct *wq); -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); -int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line); -int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf); -int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); -void pblk_line_put(struct kref *ref); -void pblk_line_put_wq(struct kref *ref); -struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); -u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line); -void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush, bool skip_meta); -void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, - unsigned long *lun_bitmap); -void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); -void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa); -void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap); -int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, - int nr_pages); -void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, - int nr_pages); -void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa); -void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, - u64 paddr); -void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa); -void pblk_update_map_cache(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa); -void pblk_update_map_dev(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa, struct ppa_addr entry_line); -int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct pblk_line *gc_line, u64 paddr); -void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, - u64 *lba_list, int nr_secs); -int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, - sector_t blba, int nr_secs, bool *from_cache); -void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); - -/* - * pblk user I/O write path - */ -void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, - unsigned long flags); -int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); - -/* - * pblk map - */ -int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int sentry, unsigned long *lun_bitmap, - unsigned int valid_secs, struct ppa_addr *erase_ppa); -int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, - unsigned long *lun_bitmap, unsigned int valid_secs, - unsigned int off); - -/* - * pblk write thread - */ -int pblk_write_ts(void *data); -void pblk_write_timer_fn(struct timer_list *t); -void pblk_write_should_kick(struct pblk *pblk); -void pblk_write_kick(struct pblk *pblk); - -/* - * pblk read path - */ -extern struct bio_set pblk_bio_set; -void pblk_submit_read(struct pblk *pblk, struct bio *bio); -int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); -/* - * pblk recovery - */ -struct pblk_line *pblk_recov_l2p(struct pblk *pblk); -int pblk_recov_pad(struct pblk *pblk); -int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); - -/* - * pblk gc - */ -#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ -#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */ -#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ - -int pblk_gc_init(struct pblk *pblk); -void pblk_gc_exit(struct pblk *pblk, bool graceful); -void pblk_gc_should_start(struct pblk *pblk); -void pblk_gc_should_stop(struct pblk *pblk); -void pblk_gc_should_kick(struct pblk *pblk); -void pblk_gc_free_full_lines(struct pblk *pblk); -void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, - int *gc_active); -int pblk_gc_sysfs_force(struct pblk *pblk, int force); -void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line); - -/* - * pblk rate limiter - */ -void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold); -void pblk_rl_free(struct pblk_rl *rl); -void pblk_rl_update_rates(struct pblk_rl *rl); -int pblk_rl_high_thrs(struct pblk_rl *rl); -unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); -unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl); -int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); -void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); -void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); -int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); -void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); -void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); -int pblk_rl_max_io(struct pblk_rl *rl); -void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, - bool used); -int pblk_rl_is_limit(struct pblk_rl *rl); - -void pblk_rl_werr_line_in(struct pblk_rl *rl); -void pblk_rl_werr_line_out(struct pblk_rl *rl); - -/* - * pblk sysfs - */ -int pblk_sysfs_init(struct gendisk *tdisk); -void pblk_sysfs_exit(struct gendisk *tdisk); - -static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx) -{ - return c_ctx - sizeof(struct nvm_rq); -} - -static inline void *emeta_to_bb(struct line_emeta *emeta) -{ - return emeta->bb_bitmap; -} - -static inline void *emeta_to_wa(struct pblk_line_meta *lm, - struct line_emeta *emeta) -{ - return emeta->bb_bitmap + lm->blk_bitmap_len; -} - -static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta) -{ - return ((void *)emeta + pblk->lm.emeta_len[1]); -} - -static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta) -{ - return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]); -} - -static inline int pblk_line_vsc(struct pblk_line *line) -{ - return le32_to_cpu(*line->vsc); -} - -static inline int pblk_ppa_to_line_id(struct ppa_addr p) -{ - return p.a.blk; -} - -static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk, - struct ppa_addr p) -{ - return &pblk->lines[pblk_ppa_to_line_id(p)]; -} - -static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) -{ - return p.a.lun * geo->num_ch + p.a.ch; -} - -static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, - u64 line_id) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr ppa; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; - - ppa.ppa = 0; - ppa.g.blk = line_id; - ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset; - ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset; - ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset; - ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset; - ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset; - } else { - struct pblk_addrf *uaddrf = &pblk->uaddrf; - int secs, chnls, luns; - - ppa.ppa = 0; - - ppa.m.chk = line_id; - - paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs); - ppa.m.sec = secs; - - paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls); - ppa.m.grp = chnls; - - paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns); - ppa.m.pu = luns; - - ppa.m.sec += uaddrf->sec_stripe * paddr; - } - - return ppa; -} - -static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk, - struct ppa_addr p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line *line = pblk_ppa_to_line(pblk, p); - int pos = pblk_ppa_to_pos(geo, p); - - return &line->chks[pos]; -} - -static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk, - struct ppa_addr p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - return dev_to_chunk_addr(dev->parent, &pblk->addrf, p); -} - -static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, - struct ppa_addr p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - u64 paddr; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; - - paddr = (u64)p.g.ch << ppaf->ch_offset; - paddr |= (u64)p.g.lun << ppaf->lun_offset; - paddr |= (u64)p.g.pg << ppaf->pg_offset; - paddr |= (u64)p.g.pl << ppaf->pln_offset; - paddr |= (u64)p.g.sec << ppaf->sec_offset; - } else { - struct pblk_addrf *uaddrf = &pblk->uaddrf; - u64 secs = p.m.sec; - int sec_stripe; - - paddr = (u64)p.m.grp * uaddrf->sec_stripe; - paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe; - - secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe); - paddr += secs * uaddrf->sec_ws_stripe; - paddr += sec_stripe; - } - - return paddr; -} - -static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32); -} - -static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64); -} - -static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, - sector_t lba) -{ - struct ppa_addr ppa; - - if (pblk->addrf_len < 32) { - u32 *map = (u32 *)pblk->trans_map; - - ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); - } else { - struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; - - ppa = map[lba]; - } - - return ppa; -} - -static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa) -{ - if (pblk->addrf_len < 32) { - u32 *map = (u32 *)pblk->trans_map; - - map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); - } else { - u64 *map = (u64 *)pblk->trans_map; - - map[lba] = ppa.ppa; - } -} - -static inline int pblk_ppa_empty(struct ppa_addr ppa_addr) -{ - return (ppa_addr.ppa == ADDR_EMPTY); -} - -static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr) -{ - ppa_addr->ppa = ADDR_EMPTY; -} - -static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa) -{ - return (lppa.ppa == rppa.ppa); -} - -static inline int pblk_addr_in_cache(struct ppa_addr ppa) -{ - return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached); -} - -static inline int pblk_addr_to_cacheline(struct ppa_addr ppa) -{ - return ppa.c.line; -} - -static inline struct ppa_addr pblk_cacheline_to_addr(int addr) -{ - struct ppa_addr p; - - p.c.line = addr; - p.c.is_cached = 1; - - return p; -} - -static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, - struct line_header *header) -{ - u32 crc = ~(u32)0; - - crc = crc32_le(crc, (unsigned char *)header + sizeof(crc), - sizeof(struct line_header) - sizeof(crc)); - - return crc; -} - -static inline u32 pblk_calc_smeta_crc(struct pblk *pblk, - struct line_smeta *smeta) -{ - struct pblk_line_meta *lm = &pblk->lm; - u32 crc = ~(u32)0; - - crc = crc32_le(crc, (unsigned char *)smeta + - sizeof(struct line_header) + sizeof(crc), - lm->smeta_len - - sizeof(struct line_header) - sizeof(crc)); - - return crc; -} - -static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, - struct line_emeta *emeta) -{ - struct pblk_line_meta *lm = &pblk->lm; - u32 crc = ~(u32)0; - - crc = crc32_le(crc, (unsigned char *)emeta + - sizeof(struct line_header) + sizeof(crc), - lm->emeta_len[0] - - sizeof(struct line_header) - sizeof(crc)); - - return crc; -} - -static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs) -{ - return !(nr_secs % pblk->min_write_pgs); -} - -#ifdef CONFIG_NVM_PBLK_DEBUG -static inline void print_ppa(struct pblk *pblk, struct ppa_addr *p, - char *msg, int error) -{ - struct nvm_geo *geo = &pblk->dev->geo; - - if (p->c.is_cached) { - pblk_err(pblk, "ppa: (%s: %x) cache line: %llu\n", - msg, error, (u64)p->c.line); - } else if (geo->version == NVM_OCSSD_SPEC_12) { - pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n", - msg, error, - p->g.ch, p->g.lun, p->g.blk, - p->g.pg, p->g.pl, p->g.sec); - } else { - pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n", - msg, error, - p->m.grp, p->m.pu, p->m.chk, p->m.sec); - } -} - -static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, - int error) -{ - int bit = -1; - - if (rqd->nr_ppas == 1) { - print_ppa(pblk, &rqd->ppa_addr, "rqd", error); - return; - } - - while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas, - bit + 1)) < rqd->nr_ppas) { - print_ppa(pblk, &rqd->ppa_list[bit], "rqd", error); - } - - pblk_err(pblk, "error:%d, ppa_status:%llx\n", error, rqd->ppa_status); -} - -static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr *ppas, int nr_ppas) -{ - struct nvm_geo *geo = &tgt_dev->geo; - struct ppa_addr *ppa; - int i; - - for (i = 0; i < nr_ppas; i++) { - ppa = &ppas[i]; - - if (geo->version == NVM_OCSSD_SPEC_12) { - if (!ppa->c.is_cached && - ppa->g.ch < geo->num_ch && - ppa->g.lun < geo->num_lun && - ppa->g.pl < geo->num_pln && - ppa->g.blk < geo->num_chk && - ppa->g.pg < geo->num_pg && - ppa->g.sec < geo->ws_min) - continue; - } else { - if (!ppa->c.is_cached && - ppa->m.grp < geo->num_ch && - ppa->m.pu < geo->num_lun && - ppa->m.chk < geo->num_chk && - ppa->m.sec < geo->clba) - continue; - } - - print_ppa(tgt_dev->q->queuedata, ppa, "boundary", i); - - return 1; - } - return 0; -} - -static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { - WARN_ON(1); - return -EINVAL; - } - - if (rqd->opcode == NVM_OP_PWRITE) { - struct pblk_line *line; - int i; - - for (i = 0; i < rqd->nr_ppas; i++) { - line = pblk_ppa_to_line(pblk, ppa_list[i]); - - spin_lock(&line->lock); - if (line->state != PBLK_LINESTATE_OPEN) { - pblk_err(pblk, "bad ppa: line:%d,state:%d\n", - line->id, line->state); - WARN_ON(1); - spin_unlock(&line->lock); - return -EINVAL; - } - spin_unlock(&line->lock); - } - } - - return 0; -} -#endif - -static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr) -{ - struct pblk_line_meta *lm = &pblk->lm; - - if (paddr > lm->sec_per_line) - return 1; - - return 0; -} - -static inline unsigned int pblk_get_bi_idx(struct bio *bio) -{ - return bio->bi_iter.bi_idx; -} - -static inline sector_t pblk_get_lba(struct bio *bio) -{ - return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; -} - -static inline unsigned int pblk_get_secs(struct bio *bio) -{ - return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE; -} - -static inline char *pblk_disk_name(struct pblk *pblk) -{ - struct gendisk *disk = pblk->disk; - - return disk->disk_name; -} - -static inline unsigned int pblk_get_min_chks(struct pblk *pblk) -{ - struct pblk_line_meta *lm = &pblk->lm; - /* In a worst-case scenario every line will have OP invalid sectors. - * We will then need a minimum of 1/OP lines to free up a single line - */ - - return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line; -} - -static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk, - void *meta, int index) -{ - return meta + - max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) - * index; -} - -static inline int pblk_dma_meta_size(struct pblk *pblk) -{ - return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) - * NVM_MAX_VLBA; -} - -static inline int pblk_is_oob_meta_supported(struct pblk *pblk) -{ - return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta); -} -#endif /* PBLK_H_ */ diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index cbc509784b2e..dfaacd472e5d 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o nvme-core-y := core.o ioctl.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o -nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dfd9dec0c1f6..ce33014e3eb0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); - if (ns->ndev) - nvme_nvm_unregister(ns); - put_disk(ns->disk); nvme_put_ns_head(ns->head); nvme_put_ctrl(ns->ctrl); @@ -3218,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = { const struct attribute_group *nvme_ns_id_attr_groups[] = { &nvme_ns_id_attr_group, -#ifdef CONFIG_NVM - &nvme_nvm_attr_group, -#endif NULL, }; @@ -3767,13 +3761,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (nvme_update_ns_info(ns, id)) goto out_put_disk; - if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - if (nvme_nvm_register(ns, disk->disk_name, node)) { - dev_warn(ctrl->device, "LightNVM init failure\n"); - goto out_put_disk; - } - } - down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); up_write(&ctrl->namespaces_rwsem); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 305ddd415e45..22314962842d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_IO64_CMD: return nvme_user_cmd64(ns->ctrl, ns, argp); default: - if (!ns->ndev) - return -ENOTTY; - return nvme_nvm_ioctl(ns, cmd, argp); + return -ENOTTY; } } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c deleted file mode 100644 index e9d9ad47f70f..000000000000 --- a/drivers/nvme/host/lightnvm.c +++ /dev/null @@ -1,1274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * nvme-lightnvm.c - LightNVM NVMe device - * - * Copyright (C) 2014-2015 IT University of Copenhagen - * Initial release: Matias Bjorling - */ - -#include "nvme.h" - -#include -#include -#include -#include -#include -#include - -enum nvme_nvm_admin_opcode { - nvme_nvm_admin_identity = 0xe2, - nvme_nvm_admin_get_bb_tbl = 0xf2, - nvme_nvm_admin_set_bb_tbl = 0xf1, -}; - -enum nvme_nvm_log_page { - NVME_NVM_LOG_REPORT_CHUNK = 0xca, -}; - -struct nvme_nvm_ph_rw { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - -struct nvme_nvm_erase_blk { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - -struct nvme_nvm_identity { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __u32 rsvd11[6]; -}; - -struct nvme_nvm_getbbtbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __u32 rsvd4[4]; -}; - -struct nvme_nvm_setbbtbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 nlb; - __u8 value; - __u8 rsvd3; - __u32 rsvd4[3]; -}; - -struct nvme_nvm_command { - union { - struct nvme_common_command common; - struct nvme_nvm_ph_rw ph_rw; - struct nvme_nvm_erase_blk erase; - struct nvme_nvm_identity identity; - struct nvme_nvm_getbbtbl get_bb; - struct nvme_nvm_setbbtbl set_bb; - }; -}; - -struct nvme_nvm_id12_grp { - __u8 mtype; - __u8 fmtype; - __le16 res16; - __u8 num_ch; - __u8 num_lun; - __u8 num_pln; - __u8 rsvd1; - __le16 num_chk; - __le16 num_pg; - __le16 fpg_sz; - __le16 csecs; - __le16 sos; - __le16 rsvd2; - __le32 trdt; - __le32 trdm; - __le32 tprt; - __le32 tprm; - __le32 tbet; - __le32 tbem; - __le32 mpos; - __le32 mccap; - __le16 cpar; - __u8 reserved[906]; -} __packed; - -struct nvme_nvm_id12_addrf { - __u8 ch_offset; - __u8 ch_len; - __u8 lun_offset; - __u8 lun_len; - __u8 pln_offset; - __u8 pln_len; - __u8 blk_offset; - __u8 blk_len; - __u8 pg_offset; - __u8 pg_len; - __u8 sec_offset; - __u8 sec_len; - __u8 res[4]; -} __packed; - -struct nvme_nvm_id12 { - __u8 ver_id; - __u8 vmnt; - __u8 cgrps; - __u8 res; - __le32 cap; - __le32 dom; - struct nvme_nvm_id12_addrf ppaf; - __u8 resv[228]; - struct nvme_nvm_id12_grp grp; - __u8 resv2[2880]; -} __packed; - -struct nvme_nvm_bb_tbl { - __u8 tblid[4]; - __le16 verid; - __le16 revid; - __le32 rvsd1; - __le32 tblks; - __le32 tfact; - __le32 tgrown; - __le32 tdresv; - __le32 thresv; - __le32 rsvd2[8]; - __u8 blk[]; -}; - -struct nvme_nvm_id20_addrf { - __u8 grp_len; - __u8 pu_len; - __u8 chk_len; - __u8 lba_len; - __u8 resv[4]; -}; - -struct nvme_nvm_id20 { - __u8 mjr; - __u8 mnr; - __u8 resv[6]; - - struct nvme_nvm_id20_addrf lbaf; - - __le32 mccap; - __u8 resv2[12]; - - __u8 wit; - __u8 resv3[31]; - - /* Geometry */ - __le16 num_grp; - __le16 num_pu; - __le32 num_chk; - __le32 clba; - __u8 resv4[52]; - - /* Write data requirements */ - __le32 ws_min; - __le32 ws_opt; - __le32 mw_cunits; - __le32 maxoc; - __le32 maxocpu; - __u8 resv5[44]; - - /* Performance related metrics */ - __le32 trdt; - __le32 trdm; - __le32 twrt; - __le32 twrm; - __le32 tcrst; - __le32 tcrsm; - __u8 resv6[40]; - - /* Reserved area */ - __u8 resv7[2816]; - - /* Vendor specific */ - __u8 vs[1024]; -}; - -struct nvme_nvm_chk_meta { - __u8 state; - __u8 type; - __u8 wi; - __u8 rsvd[5]; - __le64 slba; - __le64 cnlb; - __le64 wp; -}; - -/* - * Check we didn't inadvertently grow the command struct - */ -static inline void _nvme_nvm_check_size(void) -{ - BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32); - BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != - sizeof(struct nvm_chk_meta)); -} - -static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst, - struct nvme_nvm_id12_addrf *src) -{ - dst->ch_len = src->ch_len; - dst->lun_len = src->lun_len; - dst->blk_len = src->blk_len; - dst->pg_len = src->pg_len; - dst->pln_len = src->pln_len; - dst->sec_len = src->sec_len; - - dst->ch_offset = src->ch_offset; - dst->lun_offset = src->lun_offset; - dst->blk_offset = src->blk_offset; - dst->pg_offset = src->pg_offset; - dst->pln_offset = src->pln_offset; - dst->sec_offset = src->sec_offset; - - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; - dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; - dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; -} - -static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id, - struct nvm_geo *geo) -{ - struct nvme_nvm_id12_grp *src; - int sec_per_pg, sec_per_pl, pg_per_blk; - - if (id->cgrps != 1) - return -EINVAL; - - src = &id->grp; - - if (src->mtype != 0) { - pr_err("nvm: memory type not supported\n"); - return -EINVAL; - } - - /* 1.2 spec. only reports a single version id - unfold */ - geo->major_ver_id = id->ver_id; - geo->minor_ver_id = 2; - - /* Set compacted version for upper layers */ - geo->version = NVM_OCSSD_SPEC_12; - - geo->num_ch = src->num_ch; - geo->num_lun = src->num_lun; - geo->all_luns = geo->num_ch * geo->num_lun; - - geo->num_chk = le16_to_cpu(src->num_chk); - - geo->csecs = le16_to_cpu(src->csecs); - geo->sos = le16_to_cpu(src->sos); - - pg_per_blk = le16_to_cpu(src->num_pg); - sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs; - sec_per_pl = sec_per_pg * src->num_pln; - geo->clba = sec_per_pl * pg_per_blk; - - geo->all_chunks = geo->all_luns * geo->num_chk; - geo->total_secs = geo->clba * geo->all_chunks; - - geo->ws_min = sec_per_pg; - geo->ws_opt = sec_per_pg; - geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */ - - /* Do not impose values for maximum number of open blocks as it is - * unspecified in 1.2. Users of 1.2 must be aware of this and eventually - * specify these values through a quirk if restrictions apply. - */ - geo->maxoc = geo->all_luns * geo->num_chk; - geo->maxocpu = geo->num_chk; - - geo->mccap = le32_to_cpu(src->mccap); - - geo->trdt = le32_to_cpu(src->trdt); - geo->trdm = le32_to_cpu(src->trdm); - geo->tprt = le32_to_cpu(src->tprt); - geo->tprm = le32_to_cpu(src->tprm); - geo->tbet = le32_to_cpu(src->tbet); - geo->tbem = le32_to_cpu(src->tbem); - - /* 1.2 compatibility */ - geo->vmnt = id->vmnt; - geo->cap = le32_to_cpu(id->cap); - geo->dom = le32_to_cpu(id->dom); - - geo->mtype = src->mtype; - geo->fmtype = src->fmtype; - - geo->cpar = le16_to_cpu(src->cpar); - geo->mpos = le32_to_cpu(src->mpos); - - geo->pln_mode = NVM_PLANE_SINGLE; - - if (geo->mpos & 0x020202) { - geo->pln_mode = NVM_PLANE_DOUBLE; - geo->ws_opt <<= 1; - } else if (geo->mpos & 0x040404) { - geo->pln_mode = NVM_PLANE_QUAD; - geo->ws_opt <<= 2; - } - - geo->num_pln = src->num_pln; - geo->num_pg = le16_to_cpu(src->num_pg); - geo->fpg_sz = le16_to_cpu(src->fpg_sz); - - nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf); - - return 0; -} - -static void nvme_nvm_set_addr_20(struct nvm_addrf *dst, - struct nvme_nvm_id20_addrf *src) -{ - dst->ch_len = src->grp_len; - dst->lun_len = src->pu_len; - dst->chk_len = src->chk_len; - dst->sec_len = src->lba_len; - - dst->sec_offset = 0; - dst->chk_offset = dst->sec_len; - dst->lun_offset = dst->chk_offset + dst->chk_len; - dst->ch_offset = dst->lun_offset + dst->lun_len; - - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset; - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; -} - -static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id, - struct nvm_geo *geo) -{ - geo->major_ver_id = id->mjr; - geo->minor_ver_id = id->mnr; - - /* Set compacted version for upper layers */ - geo->version = NVM_OCSSD_SPEC_20; - - geo->num_ch = le16_to_cpu(id->num_grp); - geo->num_lun = le16_to_cpu(id->num_pu); - geo->all_luns = geo->num_ch * geo->num_lun; - - geo->num_chk = le32_to_cpu(id->num_chk); - geo->clba = le32_to_cpu(id->clba); - - geo->all_chunks = geo->all_luns * geo->num_chk; - geo->total_secs = geo->clba * geo->all_chunks; - - geo->ws_min = le32_to_cpu(id->ws_min); - geo->ws_opt = le32_to_cpu(id->ws_opt); - geo->mw_cunits = le32_to_cpu(id->mw_cunits); - geo->maxoc = le32_to_cpu(id->maxoc); - geo->maxocpu = le32_to_cpu(id->maxocpu); - - geo->trdt = le32_to_cpu(id->trdt); - geo->trdm = le32_to_cpu(id->trdm); - geo->tprt = le32_to_cpu(id->twrt); - geo->tprm = le32_to_cpu(id->twrm); - geo->tbet = le32_to_cpu(id->tcrst); - geo->tbem = le32_to_cpu(id->tcrsm); - - nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf); - - return 0; -} - -static int nvme_nvm_identity(struct nvm_dev *nvmdev) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_id12 *id; - struct nvme_nvm_command c = {}; - int ret; - - c.identity.opcode = nvme_nvm_admin_identity; - c.identity.nsid = cpu_to_le32(ns->head->ns_id); - - id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL); - if (!id) - return -ENOMEM; - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - id, sizeof(struct nvme_nvm_id12)); - if (ret) { - ret = -EIO; - goto out; - } - - /* - * The 1.2 and 2.0 specifications share the first byte in their geometry - * command to make it possible to know what version a device implements. - */ - switch (id->ver_id) { - case 1: - ret = nvme_nvm_setup_12(id, &nvmdev->geo); - break; - case 2: - ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id, - &nvmdev->geo); - break; - default: - dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n", - id->ver_id); - ret = -EINVAL; - } - -out: - kfree(id); - return ret; -} - -static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, - u8 *blks) -{ - struct request_queue *q = nvmdev->q; - struct nvm_geo *geo = &nvmdev->geo; - struct nvme_ns *ns = q->queuedata; - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_command c = {}; - struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->num_chk * geo->num_pln; - int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; - int ret = 0; - - c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; - c.get_bb.nsid = cpu_to_le32(ns->head->ns_id); - c.get_bb.spba = cpu_to_le64(ppa.ppa); - - bb_tbl = kzalloc(tblsz, GFP_KERNEL); - if (!bb_tbl) - return -ENOMEM; - - ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c, - bb_tbl, tblsz); - if (ret) { - dev_err(ctrl->device, "get bad block table failed (%d)\n", ret); - ret = -EIO; - goto out; - } - - if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || - bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { - dev_err(ctrl->device, "bbt format mismatch\n"); - ret = -EINVAL; - goto out; - } - - if (le16_to_cpu(bb_tbl->verid) != 1) { - ret = -EINVAL; - dev_err(ctrl->device, "bbt version not supported\n"); - goto out; - } - - if (le32_to_cpu(bb_tbl->tblks) != nr_blks) { - ret = -EINVAL; - dev_err(ctrl->device, - "bbt unsuspected blocks returned (%u!=%u)", - le32_to_cpu(bb_tbl->tblks), nr_blks); - goto out; - } - - memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln); -out: - kfree(bb_tbl); - return ret; -} - -static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_command c = {}; - int ret = 0; - - c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; - c.set_bb.nsid = cpu_to_le32(ns->head->ns_id); - c.set_bb.spba = cpu_to_le64(ppas->ppa); - c.set_bb.nlb = cpu_to_le16(nr_ppas - 1); - c.set_bb.value = type; - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - NULL, 0); - if (ret) - dev_err(ns->ctrl->device, "set bad block table failed (%d)\n", - ret); - return ret; -} - -/* - * Expect the lba in device format - */ -static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - sector_t slba, int nchks, - struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &ndev->geo; - struct nvme_ns *ns = ndev->q->queuedata; - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; - struct ppa_addr ppa; - size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); - size_t log_pos, offset, len; - int i, max_len; - int ret = 0; - - /* - * limit requests to maximum 256K to avoid issuing arbitrary large - * requests when the device does not specific a maximum transfer size. - */ - max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); - - dev_meta = kmalloc(max_len, GFP_KERNEL); - if (!dev_meta) - return -ENOMEM; - - /* Normalize lba address space to obtain log offset */ - ppa.ppa = slba; - ppa = dev_to_generic_addr(ndev, ppa); - - log_pos = ppa.m.chk; - log_pos += ppa.m.pu * geo->num_chk; - log_pos += ppa.m.grp * geo->num_lun * geo->num_chk; - - offset = log_pos * sizeof(struct nvme_nvm_chk_meta); - - while (left) { - len = min_t(unsigned int, left, max_len); - - memset(dev_meta, 0, max_len); - dev_meta_off = dev_meta; - - ret = nvme_get_log(ctrl, ns->head->ns_id, - NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM, - dev_meta, len, offset); - if (ret) { - dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); - break; - } - - for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta_off->state; - meta->type = dev_meta_off->type; - meta->wi = dev_meta_off->wi; - meta->slba = le64_to_cpu(dev_meta_off->slba); - meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); - meta->wp = le64_to_cpu(dev_meta_off->wp); - - meta++; - dev_meta_off++; - } - - offset += len; - left -= len; - } - - kfree(dev_meta); - - return ret; -} - -static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, - struct nvme_nvm_command *c) -{ - c->ph_rw.opcode = rqd->opcode; - c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id); - c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); - c->ph_rw.control = cpu_to_le16(rqd->flags); - c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); -} - -static void nvme_nvm_end_io(struct request *rq, blk_status_t status) -{ - struct nvm_rq *rqd = rq->end_io_data; - - rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); - rqd->error = nvme_req(rq)->status; - nvm_end_io(rqd); - - kfree(nvme_req(rq)->cmd); - blk_mq_free_request(rq); -} - -static struct request *nvme_nvm_alloc_request(struct request_queue *q, - struct nvm_rq *rqd, - struct nvme_nvm_command *cmd) -{ - struct nvme_ns *ns = q->queuedata; - struct request *rq; - - nvme_nvm_rqtocmd(rqd, ns, cmd); - - rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0); - if (IS_ERR(rq)) - return rq; - - rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - - if (rqd->bio) - blk_rq_append_bio(rq, rqd->bio); - else - rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); - - return rq; -} - -static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd, - void *buf) -{ - struct nvm_geo *geo = &dev->geo; - struct request_queue *q = dev->q; - struct nvme_nvm_command *cmd; - struct request *rq; - int ret; - - cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - rq = nvme_nvm_alloc_request(q, rqd, cmd); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto err_free_cmd; - } - - if (buf) { - ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas, - GFP_KERNEL); - if (ret) - goto err_free_cmd; - } - - rq->end_io_data = rqd; - - blk_execute_rq_nowait(NULL, rq, 0, nvme_nvm_end_io); - - return 0; - -err_free_cmd: - kfree(cmd); - return ret; -} - -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name, - int size) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - - return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0); -} - -static void nvme_nvm_destroy_dma_pool(void *pool) -{ - struct dma_pool *dma_pool = pool; - - dma_pool_destroy(dma_pool); -} - -static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, - gfp_t mem_flags, dma_addr_t *dma_handler) -{ - return dma_pool_alloc(pool, mem_flags, dma_handler); -} - -static void nvme_nvm_dev_dma_free(void *pool, void *addr, - dma_addr_t dma_handler) -{ - dma_pool_free(pool, addr, dma_handler); -} - -static struct nvm_dev_ops nvme_nvm_dev_ops = { - .identity = nvme_nvm_identity, - - .get_bb_tbl = nvme_nvm_get_bb_tbl, - .set_bb_tbl = nvme_nvm_set_bb_tbl, - - .get_chk_meta = nvme_nvm_get_chk_meta, - - .submit_io = nvme_nvm_submit_io, - - .create_dma_pool = nvme_nvm_create_dma_pool, - .destroy_dma_pool = nvme_nvm_destroy_dma_pool, - .dev_dma_alloc = nvme_nvm_dev_dma_alloc, - .dev_dma_free = nvme_nvm_dev_dma_free, -}; - -static int nvme_nvm_submit_user_cmd(struct request_queue *q, - struct nvme_ns *ns, - struct nvme_nvm_command *vcmd, - void __user *ubuf, unsigned int bufflen, - void __user *meta_buf, unsigned int meta_len, - void __user *ppa_buf, unsigned int ppa_len, - u32 *result, u64 *status, unsigned int timeout) -{ - bool write = nvme_is_write((struct nvme_command *)vcmd); - struct nvm_dev *dev = ns->ndev; - struct request *rq; - struct bio *bio = NULL; - __le64 *ppa_list = NULL; - dma_addr_t ppa_dma; - __le64 *metadata = NULL; - dma_addr_t metadata_dma; - DECLARE_COMPLETION_ONSTACK(wait); - int ret = 0; - - rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0); - if (IS_ERR(rq)) { - ret = -ENOMEM; - goto err_cmd; - } - - if (timeout) - rq->timeout = timeout; - - if (ppa_buf && ppa_len) { - ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); - if (!ppa_list) { - ret = -ENOMEM; - goto err_rq; - } - if (copy_from_user(ppa_list, (void __user *)ppa_buf, - sizeof(u64) * (ppa_len + 1))) { - ret = -EFAULT; - goto err_ppa; - } - vcmd->ph_rw.spba = cpu_to_le64(ppa_dma); - } else { - vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf); - } - - if (ubuf && bufflen) { - ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL); - if (ret) - goto err_ppa; - bio = rq->bio; - - if (meta_buf && meta_len) { - metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, - &metadata_dma); - if (!metadata) { - ret = -ENOMEM; - goto err_map; - } - - if (write) { - if (copy_from_user(metadata, - (void __user *)meta_buf, - meta_len)) { - ret = -EFAULT; - goto err_meta; - } - } - vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); - } - - bio_set_dev(bio, ns->disk->part0); - } - - blk_execute_rq(NULL, rq, 0); - - if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) - ret = -EINTR; - else if (nvme_req(rq)->status & 0x7ff) - ret = -EIO; - if (result) - *result = nvme_req(rq)->status & 0x7ff; - if (status) - *status = le64_to_cpu(nvme_req(rq)->result.u64); - - if (metadata && !ret && !write) { - if (copy_to_user(meta_buf, (void *)metadata, meta_len)) - ret = -EFAULT; - } -err_meta: - if (meta_buf && meta_len) - dma_pool_free(dev->dma_pool, metadata, metadata_dma); -err_map: - if (bio) - blk_rq_unmap_user(bio); -err_ppa: - if (ppa_buf && ppa_len) - dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); -err_rq: - blk_mq_free_request(rq); -err_cmd: - return ret; -} - -static int nvme_nvm_submit_vio(struct nvme_ns *ns, - struct nvm_user_vio __user *uvio) -{ - struct nvm_user_vio vio; - struct nvme_nvm_command c; - unsigned int length; - int ret; - - if (copy_from_user(&vio, uvio, sizeof(vio))) - return -EFAULT; - if (vio.flags) - return -EINVAL; - - memset(&c, 0, sizeof(c)); - c.ph_rw.opcode = vio.opcode; - c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id); - c.ph_rw.control = cpu_to_le16(vio.control); - c.ph_rw.length = cpu_to_le16(vio.nppas); - - length = (vio.nppas + 1) << ns->lba_shift; - - ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c, - (void __user *)(uintptr_t)vio.addr, length, - (void __user *)(uintptr_t)vio.metadata, - vio.metadata_len, - (void __user *)(uintptr_t)vio.ppa_list, vio.nppas, - &vio.result, &vio.status, 0); - - if (ret && copy_to_user(uvio, &vio, sizeof(vio))) - return -EFAULT; - - return ret; -} - -static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, - struct nvm_passthru_vio __user *uvcmd) -{ - struct nvm_passthru_vio vcmd; - struct nvme_nvm_command c; - struct request_queue *q; - unsigned int timeout = 0; - int ret; - - if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd))) - return -EFAULT; - if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN))) - return -EACCES; - if (vcmd.flags) - return -EINVAL; - - memset(&c, 0, sizeof(c)); - c.common.opcode = vcmd.opcode; - c.common.nsid = cpu_to_le32(ns->head->ns_id); - c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); - c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); - /* cdw11-12 */ - c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le16(vcmd.control); - c.common.cdw13 = cpu_to_le32(vcmd.cdw13); - c.common.cdw14 = cpu_to_le32(vcmd.cdw14); - c.common.cdw15 = cpu_to_le32(vcmd.cdw15); - - if (vcmd.timeout_ms) - timeout = msecs_to_jiffies(vcmd.timeout_ms); - - q = admin ? ns->ctrl->admin_q : ns->queue; - - ret = nvme_nvm_submit_user_cmd(q, ns, - (struct nvme_nvm_command *)&c, - (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len, - (void __user *)(uintptr_t)vcmd.metadata, - vcmd.metadata_len, - (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas, - &vcmd.result, &vcmd.status, timeout); - - if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd))) - return -EFAULT; - - return ret; -} - -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp) -{ - switch (cmd) { - case NVME_NVM_IOCTL_ADMIN_VIO: - return nvme_nvm_user_vcmd(ns, 1, argp); - case NVME_NVM_IOCTL_IO_VIO: - return nvme_nvm_user_vcmd(ns, 0, argp); - case NVME_NVM_IOCTL_SUBMIT_VIO: - return nvme_nvm_submit_vio(ns, argp); - default: - return -ENOTTY; - } -} - -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) -{ - struct request_queue *q = ns->queue; - struct nvm_dev *dev; - struct nvm_geo *geo; - - _nvme_nvm_check_size(); - - dev = nvm_alloc_dev(node); - if (!dev) - return -ENOMEM; - - /* Note that csecs and sos will be overridden if it is a 1.2 drive. */ - geo = &dev->geo; - geo->csecs = 1 << ns->lba_shift; - geo->sos = ns->ms; - if (ns->features & NVME_NS_EXT_LBAS) - geo->ext = true; - else - geo->ext = false; - geo->mdts = ns->ctrl->max_hw_sectors; - - dev->q = q; - memcpy(dev->name, disk_name, DISK_NAME_LEN); - dev->ops = &nvme_nvm_dev_ops; - dev->private_data = ns; - ns->ndev = dev; - - return nvm_register(dev); -} - -void nvme_nvm_unregister(struct nvme_ns *ns) -{ - nvm_unregister(ns->ndev); -} - -static ssize_t nvm_dev_attr_show(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "version") == 0) { - if (geo->major_ver_id == 1) - return scnprintf(page, PAGE_SIZE, "%u\n", - geo->major_ver_id); - else - return scnprintf(page, PAGE_SIZE, "%u.%u\n", - geo->major_ver_id, - geo->minor_ver_id); - } else if (strcmp(attr->name, "capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap); - } else if (strcmp(attr->name, "read_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt); - } else if (strcmp(attr->name, "read_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm); - } else { - return scnprintf(page, - PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page) -{ - return scnprintf(page, PAGE_SIZE, - "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - ppaf->ch_offset, ppaf->ch_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->pln_offset, ppaf->pln_len, - ppaf->blk_offset, ppaf->blk_len, - ppaf->pg_offset, ppaf->pg_len, - ppaf->sec_offset, ppaf->sec_len); -} - -static ssize_t nvm_dev_attr_show_12(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "vendor_opcode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt); - } else if (strcmp(attr->name, "device_mode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom); - /* kept for compatibility */ - } else if (strcmp(attr->name, "media_manager") == 0) { - return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); - } else if (strcmp(attr->name, "ppa_format") == 0) { - return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page); - } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype); - } else if (strcmp(attr->name, "flash_media_type") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype); - } else if (strcmp(attr->name, "num_channels") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); - } else if (strcmp(attr->name, "num_luns") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); - } else if (strcmp(attr->name, "num_planes") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln); - } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); - } else if (strcmp(attr->name, "num_pages") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg); - } else if (strcmp(attr->name, "page_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz); - } else if (strcmp(attr->name, "hw_sector_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs); - } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos); - } else if (strcmp(attr->name, "prog_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); - } else if (strcmp(attr->name, "prog_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); - } else if (strcmp(attr->name, "erase_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); - } else if (strcmp(attr->name, "erase_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); - } else if (strcmp(attr->name, "multiplane_modes") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos); - } else if (strcmp(attr->name, "media_capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap); - } else if (strcmp(attr->name, "max_phys_secs") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA); - } else { - return scnprintf(page, PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -static ssize_t nvm_dev_attr_show_20(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "groups") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); - } else if (strcmp(attr->name, "punits") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); - } else if (strcmp(attr->name, "chunks") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); - } else if (strcmp(attr->name, "clba") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba); - } else if (strcmp(attr->name, "ws_min") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min); - } else if (strcmp(attr->name, "ws_opt") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt); - } else if (strcmp(attr->name, "maxoc") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc); - } else if (strcmp(attr->name, "maxocpu") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu); - } else if (strcmp(attr->name, "mw_cunits") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits); - } else if (strcmp(attr->name, "write_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); - } else if (strcmp(attr->name, "write_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); - } else if (strcmp(attr->name, "reset_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); - } else if (strcmp(attr->name, "reset_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); - } else { - return scnprintf(page, PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -#define NVM_DEV_ATTR_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) -#define NVM_DEV_ATTR_12_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL) -#define NVM_DEV_ATTR_20_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL) - -/* general attributes */ -static NVM_DEV_ATTR_RO(version); -static NVM_DEV_ATTR_RO(capabilities); - -static NVM_DEV_ATTR_RO(read_typ); -static NVM_DEV_ATTR_RO(read_max); - -/* 1.2 values */ -static NVM_DEV_ATTR_12_RO(vendor_opcode); -static NVM_DEV_ATTR_12_RO(device_mode); -static NVM_DEV_ATTR_12_RO(ppa_format); -static NVM_DEV_ATTR_12_RO(media_manager); -static NVM_DEV_ATTR_12_RO(media_type); -static NVM_DEV_ATTR_12_RO(flash_media_type); -static NVM_DEV_ATTR_12_RO(num_channels); -static NVM_DEV_ATTR_12_RO(num_luns); -static NVM_DEV_ATTR_12_RO(num_planes); -static NVM_DEV_ATTR_12_RO(num_blocks); -static NVM_DEV_ATTR_12_RO(num_pages); -static NVM_DEV_ATTR_12_RO(page_size); -static NVM_DEV_ATTR_12_RO(hw_sector_size); -static NVM_DEV_ATTR_12_RO(oob_sector_size); -static NVM_DEV_ATTR_12_RO(prog_typ); -static NVM_DEV_ATTR_12_RO(prog_max); -static NVM_DEV_ATTR_12_RO(erase_typ); -static NVM_DEV_ATTR_12_RO(erase_max); -static NVM_DEV_ATTR_12_RO(multiplane_modes); -static NVM_DEV_ATTR_12_RO(media_capabilities); -static NVM_DEV_ATTR_12_RO(max_phys_secs); - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs[] = { - /* version agnostic attrs */ - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, - - /* 1.2 attrs */ - &dev_attr_vendor_opcode.attr, - &dev_attr_device_mode.attr, - &dev_attr_media_manager.attr, - &dev_attr_ppa_format.attr, - &dev_attr_media_type.attr, - &dev_attr_flash_media_type.attr, - &dev_attr_num_channels.attr, - &dev_attr_num_luns.attr, - &dev_attr_num_planes.attr, - &dev_attr_num_blocks.attr, - &dev_attr_num_pages.attr, - &dev_attr_page_size.attr, - &dev_attr_hw_sector_size.attr, - &dev_attr_oob_sector_size.attr, - &dev_attr_prog_typ.attr, - &dev_attr_prog_max.attr, - &dev_attr_erase_typ.attr, - &dev_attr_erase_max.attr, - &dev_attr_multiplane_modes.attr, - &dev_attr_media_capabilities.attr, - &dev_attr_max_phys_secs.attr, - - /* 2.0 attrs */ - &dev_attr_groups.attr, - &dev_attr_punits.attr, - &dev_attr_chunks.attr, - &dev_attr_clba.attr, - &dev_attr_ws_min.attr, - &dev_attr_ws_opt.attr, - &dev_attr_maxoc.attr, - &dev_attr_maxocpu.attr, - &dev_attr_mw_cunits.attr, - - &dev_attr_write_typ.attr, - &dev_attr_write_max.attr, - &dev_attr_reset_typ.attr, - &dev_attr_reset_max.attr, - - NULL, -}; - -static umode_t nvm_dev_attrs_visible(struct kobject *kobj, - struct attribute *attr, int index) -{ - struct device *dev = kobj_to_dev(kobj); - struct gendisk *disk = dev_to_disk(dev); - struct nvme_ns *ns = disk->private_data; - struct nvm_dev *ndev = ns->ndev; - struct device_attribute *dev_attr = - container_of(attr, typeof(*dev_attr), attr); - - if (!ndev) - return 0; - - if (dev_attr->show == nvm_dev_attr_show) - return attr->mode; - - switch (ndev->geo.major_ver_id) { - case 1: - if (dev_attr->show == nvm_dev_attr_show_12) - return attr->mode; - break; - case 2: - if (dev_attr->show == nvm_dev_attr_show_20) - return attr->mode; - break; - } - - return 0; -} - -const struct attribute_group nvme_nvm_attr_group = { - .name = "lightnvm", - .attrs = nvm_dev_attrs, - .is_visible = nvm_dev_attrs_visible, -}; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5cd1fa3b8464..ab803f91ace1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -92,11 +91,6 @@ enum nvme_quirks { */ NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), - /* - * Supports the LighNVM command set if indicated in vs[1]. - */ - NVME_QUIRK_LIGHTNVM = (1 << 6), - /* * Set MEDIUM priority on SQ creation */ @@ -823,26 +817,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif -#ifdef CONFIG_NVM -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); -void nvme_nvm_unregister(struct nvme_ns *ns); -extern const struct attribute_group nvme_nvm_attr_group; -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp); -#else -static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, - int node) -{ - return 0; -} - -static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp) -{ - return -ENOTTY; -} -#endif /* CONFIG_NVM */ - static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 51852085239e..db7a9bee2014 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3243,12 +3243,6 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, - { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, - { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, - { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h deleted file mode 100644 index 0908abda9c1b..000000000000 --- a/include/linux/lightnvm.h +++ /dev/null @@ -1,697 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef NVM_H -#define NVM_H - -#include -#include -#include - -enum { - NVM_IO_OK = 0, - NVM_IO_REQUEUE = 1, - NVM_IO_DONE = 2, - NVM_IO_ERR = 3, - - NVM_IOTYPE_NONE = 0, - NVM_IOTYPE_GC = 1, -}; - -/* common format */ -#define NVM_GEN_CH_BITS (8) -#define NVM_GEN_LUN_BITS (8) -#define NVM_GEN_BLK_BITS (16) -#define NVM_GEN_RESERVED (32) - -/* 1.2 format */ -#define NVM_12_PG_BITS (16) -#define NVM_12_PL_BITS (4) -#define NVM_12_SEC_BITS (4) -#define NVM_12_RESERVED (8) - -/* 2.0 format */ -#define NVM_20_SEC_BITS (24) -#define NVM_20_RESERVED (8) - -enum { - NVM_OCSSD_SPEC_12 = 12, - NVM_OCSSD_SPEC_20 = 20, -}; - -struct ppa_addr { - /* Generic structure for all addresses */ - union { - /* generic device format */ - struct { - u64 ch : NVM_GEN_CH_BITS; - u64 lun : NVM_GEN_LUN_BITS; - u64 blk : NVM_GEN_BLK_BITS; - u64 reserved : NVM_GEN_RESERVED; - } a; - - /* 1.2 device format */ - struct { - u64 ch : NVM_GEN_CH_BITS; - u64 lun : NVM_GEN_LUN_BITS; - u64 blk : NVM_GEN_BLK_BITS; - u64 pg : NVM_12_PG_BITS; - u64 pl : NVM_12_PL_BITS; - u64 sec : NVM_12_SEC_BITS; - u64 reserved : NVM_12_RESERVED; - } g; - - /* 2.0 device format */ - struct { - u64 grp : NVM_GEN_CH_BITS; - u64 pu : NVM_GEN_LUN_BITS; - u64 chk : NVM_GEN_BLK_BITS; - u64 sec : NVM_20_SEC_BITS; - u64 reserved : NVM_20_RESERVED; - } m; - - struct { - u64 line : 63; - u64 is_cached : 1; - } c; - - u64 ppa; - }; -}; - -struct nvm_rq; -struct nvm_id; -struct nvm_dev; -struct nvm_tgt_dev; -struct nvm_chk_meta; - -typedef int (nvm_id_fn)(struct nvm_dev *); -typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); -typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); -typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, - struct nvm_chk_meta *); -typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *, void *); -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int); -typedef void (nvm_destroy_dma_pool_fn)(void *); -typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, - dma_addr_t *); -typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); - -struct nvm_dev_ops { - nvm_id_fn *identity; - nvm_op_bb_tbl_fn *get_bb_tbl; - nvm_op_set_bb_fn *set_bb_tbl; - - nvm_get_chk_meta_fn *get_chk_meta; - - nvm_submit_io_fn *submit_io; - - nvm_create_dma_pool_fn *create_dma_pool; - nvm_destroy_dma_pool_fn *destroy_dma_pool; - nvm_dev_dma_alloc_fn *dev_dma_alloc; - nvm_dev_dma_free_fn *dev_dma_free; -}; - -#ifdef CONFIG_NVM - -#include -#include - -enum { - /* HW Responsibilities */ - NVM_RSP_L2P = 1 << 0, - NVM_RSP_ECC = 1 << 1, - - /* Physical Adressing Mode */ - NVM_ADDRMODE_LINEAR = 0, - NVM_ADDRMODE_CHANNEL = 1, - - /* Plane programming mode for LUN */ - NVM_PLANE_SINGLE = 1, - NVM_PLANE_DOUBLE = 2, - NVM_PLANE_QUAD = 4, - - /* Status codes */ - NVM_RSP_SUCCESS = 0x0, - NVM_RSP_NOT_CHANGEABLE = 0x1, - NVM_RSP_ERR_FAILWRITE = 0x40ff, - NVM_RSP_ERR_EMPTYPAGE = 0x42ff, - NVM_RSP_ERR_FAILECC = 0x4281, - NVM_RSP_ERR_FAILCRC = 0x4004, - NVM_RSP_WARN_HIGHECC = 0x4700, - - /* Device opcodes */ - NVM_OP_PWRITE = 0x91, - NVM_OP_PREAD = 0x92, - NVM_OP_ERASE = 0x90, - - /* PPA Command Flags */ - NVM_IO_SNGL_ACCESS = 0x0, - NVM_IO_DUAL_ACCESS = 0x1, - NVM_IO_QUAD_ACCESS = 0x2, - - /* NAND Access Modes */ - NVM_IO_SUSPEND = 0x80, - NVM_IO_SLC_MODE = 0x100, - NVM_IO_SCRAMBLE_ENABLE = 0x200, - - /* Block Types */ - NVM_BLK_T_FREE = 0x0, - NVM_BLK_T_BAD = 0x1, - NVM_BLK_T_GRWN_BAD = 0x2, - NVM_BLK_T_DEV = 0x4, - NVM_BLK_T_HOST = 0x8, - - /* Memory capabilities */ - NVM_ID_CAP_SLC = 0x1, - NVM_ID_CAP_CMD_SUSPEND = 0x2, - NVM_ID_CAP_SCRAMBLE = 0x4, - NVM_ID_CAP_ENCRYPT = 0x8, - - /* Memory types */ - NVM_ID_FMTYPE_SLC = 0, - NVM_ID_FMTYPE_MLC = 1, - - /* Device capabilities */ - NVM_ID_DCAP_BBLKMGMT = 0x1, - NVM_UD_DCAP_ECC = 0x2, -}; - -struct nvm_id_lp_mlc { - u16 num_pairs; - u8 pairs[886]; -}; - -struct nvm_id_lp_tbl { - __u8 id[8]; - struct nvm_id_lp_mlc mlc; -}; - -struct nvm_addrf_12 { - u8 ch_len; - u8 lun_len; - u8 blk_len; - u8 pg_len; - u8 pln_len; - u8 sec_len; - - u8 ch_offset; - u8 lun_offset; - u8 blk_offset; - u8 pg_offset; - u8 pln_offset; - u8 sec_offset; - - u64 ch_mask; - u64 lun_mask; - u64 blk_mask; - u64 pg_mask; - u64 pln_mask; - u64 sec_mask; -}; - -struct nvm_addrf { - u8 ch_len; - u8 lun_len; - u8 chk_len; - u8 sec_len; - u8 rsv_len[2]; - - u8 ch_offset; - u8 lun_offset; - u8 chk_offset; - u8 sec_offset; - u8 rsv_off[2]; - - u64 ch_mask; - u64 lun_mask; - u64 chk_mask; - u64 sec_mask; - u64 rsv_mask[2]; -}; - -enum { - /* Chunk states */ - NVM_CHK_ST_FREE = 1 << 0, - NVM_CHK_ST_CLOSED = 1 << 1, - NVM_CHK_ST_OPEN = 1 << 2, - NVM_CHK_ST_OFFLINE = 1 << 3, - - /* Chunk types */ - NVM_CHK_TP_W_SEQ = 1 << 0, - NVM_CHK_TP_W_RAN = 1 << 1, - NVM_CHK_TP_SZ_SPEC = 1 << 4, -}; - -/* - * Note: The structure size is linked to nvme_nvm_chk_meta such that the same - * buffer can be used when converting from little endian to cpu addressing. - */ -struct nvm_chk_meta { - u8 state; - u8 type; - u8 wi; - u8 rsvd[5]; - u64 slba; - u64 cnlb; - u64 wp; -}; - -struct nvm_target { - struct list_head list; - struct nvm_tgt_dev *dev; - struct nvm_tgt_type *type; - struct gendisk *disk; -}; - -#define ADDR_EMPTY (~0ULL) - -#define NVM_TARGET_DEFAULT_OP (101) -#define NVM_TARGET_MIN_OP (3) -#define NVM_TARGET_MAX_OP (80) - -#define NVM_VERSION_MAJOR 1 -#define NVM_VERSION_MINOR 0 -#define NVM_VERSION_PATCH 0 - -#define NVM_MAX_VLBA (64) /* max logical blocks in a vector command */ - -struct nvm_rq; -typedef void (nvm_end_io_fn)(struct nvm_rq *); - -struct nvm_rq { - struct nvm_tgt_dev *dev; - - struct bio *bio; - - union { - struct ppa_addr ppa_addr; - dma_addr_t dma_ppa_list; - }; - - struct ppa_addr *ppa_list; - - void *meta_list; - dma_addr_t dma_meta_list; - - nvm_end_io_fn *end_io; - - uint8_t opcode; - uint16_t nr_ppas; - uint16_t flags; - - u64 ppa_status; /* ppa media status */ - int error; - - int is_seq; /* Sequential hint flag. 1.2 only */ - - void *private; -}; - -static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) -{ - return pdu - sizeof(struct nvm_rq); -} - -static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) -{ - return rqdata + 1; -} - -static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd) -{ - return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; -} - -enum { - NVM_BLK_ST_FREE = 0x1, /* Free block */ - NVM_BLK_ST_TGT = 0x2, /* Block in use by target */ - NVM_BLK_ST_BAD = 0x8, /* Bad block */ -}; - -/* Instance geometry */ -struct nvm_geo { - /* device reported version */ - u8 major_ver_id; - u8 minor_ver_id; - - /* kernel short version */ - u8 version; - - /* instance specific geometry */ - int num_ch; - int num_lun; /* per channel */ - - /* calculated values */ - int all_luns; /* across channels */ - int all_chunks; /* across channels */ - - int op; /* over-provision in instance */ - - sector_t total_secs; /* across channels */ - - /* chunk geometry */ - u32 num_chk; /* chunks per lun */ - u32 clba; /* sectors per chunk */ - u16 csecs; /* sector size */ - u16 sos; /* out-of-band area size */ - bool ext; /* metadata in extended data buffer */ - u32 mdts; /* Max data transfer size*/ - - /* device write constrains */ - u32 ws_min; /* minimum write size */ - u32 ws_opt; /* optimal write size */ - u32 mw_cunits; /* distance required for successful read */ - u32 maxoc; /* maximum open chunks */ - u32 maxocpu; /* maximum open chunks per parallel unit */ - - /* device capabilities */ - u32 mccap; - - /* device timings */ - u32 trdt; /* Avg. Tread (ns) */ - u32 trdm; /* Max Tread (ns) */ - u32 tprt; /* Avg. Tprog (ns) */ - u32 tprm; /* Max Tprog (ns) */ - u32 tbet; /* Avg. Terase (ns) */ - u32 tbem; /* Max Terase (ns) */ - - /* generic address format */ - struct nvm_addrf addrf; - - /* 1.2 compatibility */ - u8 vmnt; - u32 cap; - u32 dom; - - u8 mtype; - u8 fmtype; - - u16 cpar; - u32 mpos; - - u8 num_pln; - u8 pln_mode; - u16 num_pg; - u16 fpg_sz; -}; - -/* sub-device structure */ -struct nvm_tgt_dev { - /* Device information */ - struct nvm_geo geo; - - /* Base ppas for target LUNs */ - struct ppa_addr *luns; - - struct request_queue *q; - - struct nvm_dev *parent; - void *map; -}; - -struct nvm_dev { - struct nvm_dev_ops *ops; - - struct list_head devices; - - /* Device information */ - struct nvm_geo geo; - - unsigned long *lun_map; - void *dma_pool; - - /* Backend device */ - struct request_queue *q; - char name[DISK_NAME_LEN]; - void *private_data; - - struct kref ref; - void *rmap; - - struct mutex mlock; - spinlock_t lock; - - /* target management */ - struct list_head area_list; - struct list_head targets; -}; - -static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr l; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; - - l.ppa = ((u64)r.g.ch) << ppaf->ch_offset; - l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset; - l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset; - l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset; - l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset; - l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &geo->addrf; - - l.ppa = ((u64)r.m.grp) << lbaf->ch_offset; - l.ppa |= ((u64)r.m.pu) << lbaf->lun_offset; - l.ppa |= ((u64)r.m.chk) << lbaf->chk_offset; - l.ppa |= ((u64)r.m.sec) << lbaf->sec_offset; - } - - return l; -} - -static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr l; - - l.ppa = 0; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; - - l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset; - l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset; - l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset; - l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset; - l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset; - l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &geo->addrf; - - l.m.grp = (r.ppa & lbaf->ch_mask) >> lbaf->ch_offset; - l.m.pu = (r.ppa & lbaf->lun_mask) >> lbaf->lun_offset; - l.m.chk = (r.ppa & lbaf->chk_mask) >> lbaf->chk_offset; - l.m.sec = (r.ppa & lbaf->sec_mask) >> lbaf->sec_offset; - } - - return l; -} - -static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf, - struct ppa_addr p) -{ - struct nvm_geo *geo = &dev->geo; - u64 caddr; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf; - - caddr = (u64)p.g.pg << ppaf->pg_offset; - caddr |= (u64)p.g.pl << ppaf->pln_offset; - caddr |= (u64)p.g.sec << ppaf->sec_offset; - } else { - caddr = p.m.sec; - } - - return caddr; -} - -static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev, - void *addrf, u32 ppa32) -{ - struct ppa_addr ppa64; - - ppa64.ppa = 0; - - if (ppa32 == -1) { - ppa64.ppa = ADDR_EMPTY; - } else if (ppa32 & (1U << 31)) { - ppa64.c.line = ppa32 & ((~0U) >> 1); - ppa64.c.is_cached = 1; - } else { - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = addrf; - - ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> - ppaf->ch_offset; - ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> - ppaf->lun_offset; - ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> - ppaf->blk_offset; - ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> - ppaf->pg_offset; - ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> - ppaf->pln_offset; - ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> - ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = addrf; - - ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> - lbaf->ch_offset; - ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> - lbaf->lun_offset; - ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> - lbaf->chk_offset; - ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> - lbaf->sec_offset; - } - } - - return ppa64; -} - -static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev, - void *addrf, struct ppa_addr ppa64) -{ - u32 ppa32 = 0; - - if (ppa64.ppa == ADDR_EMPTY) { - ppa32 = ~0U; - } else if (ppa64.c.is_cached) { - ppa32 |= ppa64.c.line; - ppa32 |= 1U << 31; - } else { - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = addrf; - - ppa32 |= ppa64.g.ch << ppaf->ch_offset; - ppa32 |= ppa64.g.lun << ppaf->lun_offset; - ppa32 |= ppa64.g.blk << ppaf->blk_offset; - ppa32 |= ppa64.g.pg << ppaf->pg_offset; - ppa32 |= ppa64.g.pl << ppaf->pln_offset; - ppa32 |= ppa64.g.sec << ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = addrf; - - ppa32 |= ppa64.m.grp << lbaf->ch_offset; - ppa32 |= ppa64.m.pu << lbaf->lun_offset; - ppa32 |= ppa64.m.chk << lbaf->chk_offset; - ppa32 |= ppa64.m.sec << lbaf->sec_offset; - } - } - - return ppa32; -} - -static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev, - struct ppa_addr *ppa) -{ - struct nvm_geo *geo = &dev->geo; - int last = 0; - - if (geo->version == NVM_OCSSD_SPEC_12) { - int sec = ppa->g.sec; - - sec++; - if (sec == geo->ws_min) { - int pg = ppa->g.pg; - - sec = 0; - pg++; - if (pg == geo->num_pg) { - int pl = ppa->g.pl; - - pg = 0; - pl++; - if (pl == geo->num_pln) - last = 1; - - ppa->g.pl = pl; - } - ppa->g.pg = pg; - } - ppa->g.sec = sec; - } else { - ppa->m.sec++; - if (ppa->m.sec == geo->clba) - last = 1; - } - - return last; -} - -typedef sector_t (nvm_tgt_capacity_fn)(void *); -typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, - int flags); -typedef void (nvm_tgt_exit_fn)(void *, bool); -typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); -typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); - -enum { - NVM_TGT_F_DEV_L2P = 0, - NVM_TGT_F_HOST_L2P = 1 << 0, -}; - -struct nvm_tgt_type { - const char *name; - unsigned int version[3]; - int flags; - - /* target entry points */ - const struct block_device_operations *bops; - nvm_tgt_capacity_fn *capacity; - - /* module-specific init/teardown */ - nvm_tgt_init_fn *init; - nvm_tgt_exit_fn *exit; - - /* sysfs */ - nvm_tgt_sysfs_init_fn *sysfs_init; - nvm_tgt_sysfs_exit_fn *sysfs_exit; - - /* For internal use */ - struct list_head list; - struct module *owner; -}; - -extern int nvm_register_tgt_type(struct nvm_tgt_type *); -extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); - -extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); -extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); - -extern struct nvm_dev *nvm_alloc_dev(int); -extern int nvm_register(struct nvm_dev *); -extern void nvm_unregister(struct nvm_dev *); - -extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr, - int, struct nvm_chk_meta *); -extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *, - int, int); -extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *, void *); -extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *, void *); -extern void nvm_end_io(struct nvm_rq *); - -#else /* CONFIG_NVM */ -struct nvm_dev_ops; - -static inline struct nvm_dev *nvm_alloc_dev(int node) -{ - return ERR_PTR(-EINVAL); -} -static inline int nvm_register(struct nvm_dev *dev) -{ - return -EINVAL; -} -static inline void nvm_unregister(struct nvm_dev *dev) {} -#endif /* CONFIG_NVM */ -#endif /* LIGHTNVM.H */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h deleted file mode 100644 index 2745afd9b8fa..000000000000 --- a/include/uapi/linux/lightnvm.h +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2015 CNEX Labs. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#ifndef _UAPI_LINUX_LIGHTNVM_H -#define _UAPI_LINUX_LIGHTNVM_H - -#ifdef __KERNEL__ -#include -#else /* __KERNEL__ */ -#include -#include -#define DISK_NAME_LEN 32 -#endif /* __KERNEL__ */ - -#include -#include - -#define NVM_TTYPE_NAME_MAX 48 -#define NVM_TTYPE_MAX 63 -#define NVM_MMTYPE_LEN 8 - -#define NVM_CTRL_FILE "/dev/lightnvm/control" - -struct nvm_ioctl_info_tgt { - __u32 version[3]; - __u32 reserved; - char tgtname[NVM_TTYPE_NAME_MAX]; -}; - -struct nvm_ioctl_info { - __u32 version[3]; /* in/out - major, minor, patch */ - __u16 tgtsize; /* number of targets */ - __u16 reserved16; /* pad to 4K page */ - __u32 reserved[12]; - struct nvm_ioctl_info_tgt tgts[NVM_TTYPE_MAX]; -}; - -enum { - NVM_DEVICE_ACTIVE = 1 << 0, -}; - -struct nvm_ioctl_device_info { - char devname[DISK_NAME_LEN]; - char bmname[NVM_TTYPE_NAME_MAX]; - __u32 bmversion[3]; - __u32 flags; - __u32 reserved[8]; -}; - -struct nvm_ioctl_get_devices { - __u32 nr_devices; - __u32 reserved[31]; - struct nvm_ioctl_device_info info[31]; -}; - -struct nvm_ioctl_create_simple { - __u32 lun_begin; - __u32 lun_end; -}; - -struct nvm_ioctl_create_extended { - __u16 lun_begin; - __u16 lun_end; - __u16 op; - __u16 rsv; -}; - -enum { - NVM_CONFIG_TYPE_SIMPLE = 0, - NVM_CONFIG_TYPE_EXTENDED = 1, -}; - -struct nvm_ioctl_create_conf { - __u32 type; - union { - struct nvm_ioctl_create_simple s; - struct nvm_ioctl_create_extended e; - }; -}; - -enum { - NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */ -}; - -struct nvm_ioctl_create { - char dev[DISK_NAME_LEN]; /* open-channel SSD device */ - char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */ - char tgtname[DISK_NAME_LEN]; /* dev to expose target as */ - - __u32 flags; - - struct nvm_ioctl_create_conf conf; -}; - -struct nvm_ioctl_remove { - char tgtname[DISK_NAME_LEN]; - - __u32 flags; -}; - -struct nvm_ioctl_dev_init { - char dev[DISK_NAME_LEN]; /* open-channel SSD device */ - char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */ - - __u32 flags; -}; - -enum { - NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as - * host blks or grown blks */ - NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */ - NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */ - NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */ -}; - -struct nvm_ioctl_dev_factory { - char dev[DISK_NAME_LEN]; - - __u32 flags; -}; - -struct nvm_user_vio { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nppas; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 ppa_list; - __u32 metadata_len; - __u32 data_len; - __u64 status; - __u32 result; - __u32 rsvd3[3]; -}; - -struct nvm_passthru_vio { - __u8 opcode; - __u8 flags; - __u8 rsvd[2]; - __u32 nsid; - __u32 cdw2; - __u32 cdw3; - __u64 metadata; - __u64 addr; - __u32 metadata_len; - __u32 data_len; - __u64 ppa_list; - __u16 nppas; - __u16 control; - __u32 cdw13; - __u32 cdw14; - __u32 cdw15; - __u64 status; - __u32 result; - __u32 timeout_ms; -}; - -/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ -enum { - /* top level cmds */ - NVM_INFO_CMD = 0x20, - NVM_GET_DEVICES_CMD, - - /* device level cmds */ - NVM_DEV_CREATE_CMD, - NVM_DEV_REMOVE_CMD, - - /* Init a device to support LightNVM media managers */ - NVM_DEV_INIT_CMD, - - /* Factory reset device */ - NVM_DEV_FACTORY_CMD, - - /* Vector user I/O */ - NVM_DEV_VIO_ADMIN_CMD = 0x41, - NVM_DEV_VIO_CMD = 0x42, - NVM_DEV_VIO_USER_CMD = 0x43, -}; - -#define NVM_IOCTL 'L' /* 0x4c */ - -#define NVM_INFO _IOWR(NVM_IOCTL, NVM_INFO_CMD, \ - struct nvm_ioctl_info) -#define NVM_GET_DEVICES _IOR(NVM_IOCTL, NVM_GET_DEVICES_CMD, \ - struct nvm_ioctl_get_devices) -#define NVM_DEV_CREATE _IOW(NVM_IOCTL, NVM_DEV_CREATE_CMD, \ - struct nvm_ioctl_create) -#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \ - struct nvm_ioctl_remove) -#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \ - struct nvm_ioctl_dev_init) -#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ - struct nvm_ioctl_dev_factory) - -#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \ - struct nvm_passthru_vio) -#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\ - struct nvm_passthru_vio) -#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\ - struct nvm_user_vio) - -#define NVM_VERSION_MAJOR 1 -#define NVM_VERSION_MINOR 0 -#define NVM_VERSION_PATCHLEVEL 0 - -#endif -- cgit v1.2.3-58-ga151