summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 14:20:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 14:20:22 -0700
commit3e7819886281e077e82006fe4804b0d6b0f5643b (patch)
tree40766af623d8a1dde0edaee8b6abc496efbcc615 /include
parent3a56e241732975c2c1247047ddbfc0ac6f6a4905 (diff)
parent3c1743a685b19bc17cf65af4a2eb149fd3b15c50 (diff)
Merge tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe updates via Keith: - Device initialization memory leak fixes (Keith) - More constants defined (Weiwen) - Target debugfs support (Hannes) - PCIe subsystem reset enhancements (Keith) - Queue-depth multipath policy (Redhat and PureStorage) - Implement get_unique_id (Christoph) - Authentication error fixes (Gaosheng) - MD updates via Song - sync_action fix and refactoring (Yu Kuai) - Various small fixes (Christoph Hellwig, Li Nan, and Ofir Gal, Yu Kuai, Benjamin Marzinski, Christophe JAILLET, Yang Li) - Fix loop detach/open race (Gulam) - Fix lower control limit for blk-throttle (Yu) - Add module descriptions to various drivers (Jeff) - Add support for atomic writes for block devices, and statx reporting for same. Includes SCSI and NVMe (John, Prasad, Alan) - Add IO priority information to block trace points (Dongliang) - Various zone improvements and tweaks (Damien) - mq-deadline tag reservation improvements (Bart) - Ignore direct reclaim swap writes in writeback throttling (Baokun) - Block integrity improvements and fixes (Anuj) - Add basic support for rust based block drivers. Has a dummy null_blk variant for now (Andreas) - Series converting driver settings to queue limits, and cleanups and fixes related to that (Christoph) - Cleanup for poking too deeply into the bvec internals, in preparation for DMA mapping API changes (Christoph) - Various minor tweaks and fixes (Jiapeng, John, Kanchan, Mikulas, Ming, Zhu, Damien, Christophe, Chaitanya) * tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux: (206 commits) floppy: add missing MODULE_DESCRIPTION() macro loop: add missing MODULE_DESCRIPTION() macro ublk_drv: add missing MODULE_DESCRIPTION() macro xen/blkback: add missing MODULE_DESCRIPTION() macro block/rnbd: Constify struct kobj_type block: take offset into account in blk_bvec_map_sg again block: fix get_max_segment_size() warning loop: Don't bother validating blocksize virtio_blk: Don't bother validating blocksize null_blk: Don't bother validating blocksize block: Validate logical block size in blk_validate_limits() virtio_blk: Fix default logical block size fallback nvmet-auth: fix nvmet_auth hash error handling nvme: implement ->get_unique_id block: pass a phys_addr_t to get_max_segment_size block: add a bvec_phys helper blk-lib: check for kill signal in ioctl BLKZEROOUT block: limit the Write Zeroes to manually writing zeroes fallback block: refacto blkdev_issue_zeroout block: move read-only and supported checks into (__)blkdev_issue_zeroout ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/blk-integrity.h87
-rw-r--r--include/linux/blk_types.h8
-rw-r--r--include/linux/blkdev.h348
-rw-r--r--include/linux/bvec.h14
-rw-r--r--include/linux/device-mapper.h7
-rw-r--r--include/linux/fs.h20
-rw-r--r--include/linux/nvme-fc-driver.h4
-rw-r--r--include/linux/nvme.h19
-rw-r--r--include/linux/stat.h3
-rw-r--r--include/linux/t10-pi.h22
-rw-r--r--include/scsi/scsi_proto.h1
-rw-r--r--include/trace/events/block.h41
-rw-r--r--include/trace/events/scsi.h1
-rw-r--r--include/uapi/linux/fs.h5
-rw-r--r--include/uapi/linux/stat.h10
15 files changed, 363 insertions, 227 deletions
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 7428cb43952d..804f856ed3e5 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -7,51 +7,38 @@
struct request;
enum blk_integrity_flags {
- BLK_INTEGRITY_VERIFY = 1 << 0,
- BLK_INTEGRITY_GENERATE = 1 << 1,
+ BLK_INTEGRITY_NOVERIFY = 1 << 0,
+ BLK_INTEGRITY_NOGENERATE = 1 << 1,
BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
- BLK_INTEGRITY_IP_CHECKSUM = 1 << 3,
+ BLK_INTEGRITY_REF_TAG = 1 << 3,
+ BLK_INTEGRITY_STACKED = 1 << 4,
};
-struct blk_integrity_iter {
- void *prot_buf;
- void *data_buf;
- sector_t seed;
- unsigned int data_size;
- unsigned short interval;
- unsigned char tuple_size;
- unsigned char pi_offset;
- const char *disk_name;
-};
-
-typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
-typedef void (integrity_prepare_fn) (struct request *);
-typedef void (integrity_complete_fn) (struct request *, unsigned int);
-
-struct blk_integrity_profile {
- integrity_processing_fn *generate_fn;
- integrity_processing_fn *verify_fn;
- integrity_prepare_fn *prepare_fn;
- integrity_complete_fn *complete_fn;
- const char *name;
-};
+const char *blk_integrity_profile_name(struct blk_integrity *bi);
+bool queue_limits_stack_integrity(struct queue_limits *t,
+ struct queue_limits *b);
+static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
+ struct block_device *bdev)
+{
+ return queue_limits_stack_integrity(t, &bdev->bd_disk->queue->limits);
+}
#ifdef CONFIG_BLK_DEV_INTEGRITY
-void blk_integrity_register(struct gendisk *, struct blk_integrity *);
-void blk_integrity_unregister(struct gendisk *);
-int blk_integrity_compare(struct gendisk *, struct gendisk *);
int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
struct scatterlist *);
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
{
- struct blk_integrity *bi = &disk->queue->integrity;
+ return q->limits.integrity.tuple_size;
+}
- if (!bi->profile)
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+ if (!blk_integrity_queue_supports_integrity(disk->queue))
return NULL;
-
- return bi;
+ return &disk->queue->limits.integrity;
}
static inline struct blk_integrity *
@@ -60,12 +47,6 @@ bdev_get_integrity(struct block_device *bdev)
return blk_get_integrity(bdev->bd_disk);
}
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
- return q->integrity.profile;
-}
-
static inline unsigned short
queue_max_integrity_segments(const struct request_queue *q)
{
@@ -100,14 +81,13 @@ static inline bool blk_integrity_rq(struct request *rq)
}
/*
- * Return the first bvec that contains integrity data. Only drivers that are
- * limited to a single integrity segment should use this helper.
+ * Return the current bvec that contains the integrity data. bip_iter may be
+ * advanced to iterate over the integrity data.
*/
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
- return NULL;
- return rq->bio->bi_integrity->bip_vec;
+ return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+ rq->bio->bi_integrity->bip_iter);
}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -134,17 +114,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q)
{
return false;
}
-static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
-{
- return 0;
-}
-static inline void blk_integrity_register(struct gendisk *d,
- struct blk_integrity *b)
-{
-}
-static inline void blk_integrity_unregister(struct gendisk *d)
-{
-}
static inline unsigned short
queue_max_integrity_segments(const struct request_queue *q)
{
@@ -167,9 +136,11 @@ static inline int blk_integrity_rq(struct request *rq)
return 0;
}
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- return NULL;
+ /* the optimizer will remove all calls to this function */
+ return (struct bio_vec){ };
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
#endif /* _LINUX_BLK_INTEGRITY_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 781c4500491b..632edd71f8c6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -162,6 +162,11 @@ typedef u16 blk_short_t;
*/
#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17)
+/*
+ * Invalid size or alignment.
+ */
+#define BLK_STS_INVAL ((__force blk_status_t)19)
+
/**
* blk_path_error - returns true if error may be path related
* @error: status the request was completed with
@@ -370,7 +375,7 @@ enum req_flag_bits {
__REQ_SWAP, /* swap I/O */
__REQ_DRV, /* for driver use */
__REQ_FS_PRIVATE, /* for file system (submitter) use */
-
+ __REQ_ATOMIC, /* for atomic write operations */
/*
* Command specific flags, keep last:
*/
@@ -402,6 +407,7 @@ enum req_flag_bits {
#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP)
#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
+#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC)
#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24c36929920b..b8196e219ac2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -105,9 +105,16 @@ enum {
struct disk_events;
struct badblocks;
+enum blk_integrity_checksum {
+ BLK_INTEGRITY_CSUM_NONE = 0,
+ BLK_INTEGRITY_CSUM_IP = 1,
+ BLK_INTEGRITY_CSUM_CRC = 2,
+ BLK_INTEGRITY_CSUM_CRC64 = 3,
+} __packed ;
+
struct blk_integrity {
- const struct blk_integrity_profile *profile;
unsigned char flags;
+ enum blk_integrity_checksum csum_type;
unsigned char tuple_size;
unsigned char pi_offset;
unsigned char interval_exp;
@@ -261,6 +268,7 @@ static inline dev_t disk_devt(struct gendisk *disk)
return MKDEV(disk->major, disk->first_minor);
}
+/* blk_validate_limits() validates bsize, so drivers don't usually need to */
static inline int blk_validate_block_size(unsigned long bsize)
{
if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
@@ -275,17 +283,75 @@ static inline bool blk_op_is_passthrough(blk_opf_t op)
return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
}
+/* flags set by the driver in queue_limits.features */
+typedef unsigned int __bitwise blk_features_t;
+
+/* supports a volatile write cache */
+#define BLK_FEAT_WRITE_CACHE ((__force blk_features_t)(1u << 0))
+
+/* supports passing on the FUA bit */
+#define BLK_FEAT_FUA ((__force blk_features_t)(1u << 1))
+
+/* rotational device (hard drive or floppy) */
+#define BLK_FEAT_ROTATIONAL ((__force blk_features_t)(1u << 2))
+
+/* contributes to the random number pool */
+#define BLK_FEAT_ADD_RANDOM ((__force blk_features_t)(1u << 3))
+
+/* do disk/partitions IO accounting */
+#define BLK_FEAT_IO_STAT ((__force blk_features_t)(1u << 4))
+
+/* don't modify data until writeback is done */
+#define BLK_FEAT_STABLE_WRITES ((__force blk_features_t)(1u << 5))
+
+/* always completes in submit context */
+#define BLK_FEAT_SYNCHRONOUS ((__force blk_features_t)(1u << 6))
+
+/* supports REQ_NOWAIT */
+#define BLK_FEAT_NOWAIT ((__force blk_features_t)(1u << 7))
+
+/* supports DAX */
+#define BLK_FEAT_DAX ((__force blk_features_t)(1u << 8))
+
+/* supports I/O polling */
+#define BLK_FEAT_POLL ((__force blk_features_t)(1u << 9))
+
+/* is a zoned device */
+#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10))
+
+/* supports PCI(e) p2p requests */
+#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12))
+
+/* skip this queue in blk_mq_(un)quiesce_tagset */
+#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13))
+
+/* bounce all highmem pages */
+#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14))
+
+/* undocumented magic for bcache */
+#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
+ ((__force blk_features_t)(1u << 15))
+
/*
- * BLK_BOUNCE_NONE: never bounce (default)
- * BLK_BOUNCE_HIGH: bounce all highmem pages
+ * Flags automatically inherited when stacking limits.
*/
-enum blk_bounce {
- BLK_BOUNCE_NONE,
- BLK_BOUNCE_HIGH,
-};
+#define BLK_FEAT_INHERIT_MASK \
+ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
+ BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
+
+/* internal flags in queue_limits.flags */
+typedef unsigned int __bitwise blk_flags_t;
+
+/* do not send FLUSH/FUA commands despite advertising a write cache */
+#define BLK_FLAG_WRITE_CACHE_DISABLED ((__force blk_flags_t)(1u << 0))
+
+/* I/O topology is misaligned */
+#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
struct queue_limits {
- enum blk_bounce bounce;
+ blk_features_t features;
+ blk_flags_t flags;
unsigned long seg_boundary_mask;
unsigned long virt_boundary_mask;
@@ -310,14 +376,20 @@ struct queue_limits {
unsigned int discard_alignment;
unsigned int zone_write_granularity;
+ /* atomic write limits */
+ unsigned int atomic_write_hw_max;
+ unsigned int atomic_write_max_sectors;
+ unsigned int atomic_write_hw_boundary;
+ unsigned int atomic_write_boundary_sectors;
+ unsigned int atomic_write_hw_unit_min;
+ unsigned int atomic_write_unit_min;
+ unsigned int atomic_write_hw_unit_max;
+ unsigned int atomic_write_unit_max;
+
unsigned short max_segments;
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
- unsigned char misaligned;
- unsigned char discard_misaligned;
- unsigned char raid_partial_stripes_expensive;
- bool zoned;
unsigned int max_open_zones;
unsigned int max_active_zones;
@@ -327,13 +399,14 @@ struct queue_limits {
* due to possible offsets.
*/
unsigned int dma_alignment;
+ unsigned int dma_pad_mask;
+
+ struct blk_integrity integrity;
};
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void *data);
-void disk_set_zoned(struct gendisk *disk);
-
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
@@ -414,10 +487,6 @@ struct request_queue {
struct queue_limits limits;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- struct blk_integrity integrity;
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
#ifdef CONFIG_PM
struct device *dev;
enum rpm_status rpm_status;
@@ -439,8 +508,6 @@ struct request_queue {
*/
int id;
- unsigned int dma_pad_mask;
-
/*
* queue settings
*/
@@ -526,38 +593,20 @@ struct request_queue {
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
-#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
-#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
-#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */
#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
-#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */
#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
-#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */
-#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
-#define QUEUE_FLAG_WC 17 /* Write back caching */
-#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */
-#define QUEUE_FLAG_DAX 19 /* device supports DAX */
#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
-#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
-#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
-#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
-#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/
-#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \
- (1UL << QUEUE_FLAG_SAME_COMP) | \
- (1UL << QUEUE_FLAG_NOWAIT))
+#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP)
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
@@ -565,16 +614,10 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
-#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-#define blk_queue_stable_writes(q) \
- test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
-#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
-#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_zone_resetall(q) \
- test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
-#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-#define blk_queue_pci_p2pdma(q) \
- test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
+#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
+#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
+#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
#define blk_queue_rq_alloc_time(q) \
test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
@@ -590,7 +633,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
#define blk_queue_skip_tagset_quiesce(q) \
- test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
+ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -620,16 +663,26 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q)
static inline bool blk_queue_is_zoned(struct request_queue *q)
{
- return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned;
+ return IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (q->limits.features & BLK_FEAT_ZONED);
}
#ifdef CONFIG_BLK_DEV_ZONED
-unsigned int bdev_nr_zones(struct block_device *bdev);
-
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
- return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0;
+ return disk->nr_zones;
+}
+bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int disk_nr_zones(struct gendisk *disk)
+{
+ return 0;
}
+static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
+{
+ return false;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
{
@@ -638,16 +691,9 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
return sector >> ilog2(disk->queue->limits.chunk_sectors);
}
-static inline void disk_set_max_open_zones(struct gendisk *disk,
- unsigned int max_open_zones)
-{
- disk->queue->limits.max_open_zones = max_open_zones;
-}
-
-static inline void disk_set_max_active_zones(struct gendisk *disk,
- unsigned int max_active_zones)
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
{
- disk->queue->limits.max_active_zones = max_active_zones;
+ return disk_nr_zones(bdev->bd_disk);
}
static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
@@ -660,36 +706,6 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
return bdev->bd_disk->queue->limits.max_active_zones;
}
-bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
-#else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
- return 0;
-}
-
-static inline unsigned int disk_nr_zones(struct gendisk *disk)
-{
- return 0;
-}
-static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
-{
- return 0;
-}
-static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
-{
- return 0;
-}
-
-static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
-{
- return 0;
-}
-static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
-{
- return false;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
-
static inline unsigned int blk_queue_depth(struct request_queue *q)
{
if (q->queue_depth)
@@ -880,14 +896,15 @@ static inline bool bio_straddles_zones(struct bio *bio)
}
/*
- * Return how much of the chunk is left to be used for I/O at a given offset.
+ * Return how much within the boundary is left to be used for I/O at a given
+ * offset.
*/
-static inline unsigned int blk_chunk_sectors_left(sector_t offset,
- unsigned int chunk_sectors)
+static inline unsigned int blk_boundary_sectors_left(sector_t offset,
+ unsigned int boundary_sectors)
{
- if (unlikely(!is_power_of_2(chunk_sectors)))
- return chunk_sectors - sector_div(offset, chunk_sectors);
- return chunk_sectors - (offset & (chunk_sectors - 1));
+ if (unlikely(!is_power_of_2(boundary_sectors)))
+ return boundary_sectors - sector_div(offset, boundary_sectors);
+ return boundary_sectors - (offset & (boundary_sectors - 1));
}
/**
@@ -904,7 +921,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
*/
static inline struct queue_limits
queue_limits_start_update(struct request_queue *q)
- __acquires(q->limits_lock)
{
mutex_lock(&q->limits_lock);
return q->limits;
@@ -927,26 +943,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q)
}
/*
+ * These helpers are for drivers that have sloppy feature negotiation and might
+ * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O
+ * completion handler when the device returned an indicator that the respective
+ * feature is not actually supported. They are racy and the driver needs to
+ * cope with that. Try to avoid this scheme if you can.
+ */
+static inline void blk_queue_disable_discard(struct request_queue *q)
+{
+ q->limits.max_discard_sectors = 0;
+}
+
+static inline void blk_queue_disable_secure_erase(struct request_queue *q)
+{
+ q->limits.max_secure_erase_sectors = 0;
+}
+
+static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
+{
+ q->limits.max_write_zeroes_sectors = 0;
+}
+
+/*
* Access functions for manipulating queue properties
*/
-extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
-void blk_queue_max_secure_erase_sectors(struct request_queue *q,
- unsigned int max_sectors);
-extern void blk_queue_max_discard_sectors(struct request_queue *q,
- unsigned int max_discard_sectors);
-extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
- unsigned int max_write_same_sectors);
-extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
-extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
- unsigned int max_zone_append_sectors);
-extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
-void blk_queue_zone_write_granularity(struct request_queue *q,
- unsigned int size);
-extern void blk_queue_alignment_offset(struct request_queue *q,
- unsigned int alignment);
-void disk_update_readahead(struct gendisk *disk);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
-extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_stacking_limits(struct queue_limits *lim);
@@ -954,9 +975,7 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
sector_t offset, const char *pfx);
-extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
-extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
struct blk_independent_access_ranges *
disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
@@ -1077,6 +1096,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
+#define BLKDEV_ZERO_KILLABLE (1 << 2) /* interruptible by fatal signals */
extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
@@ -1204,12 +1224,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
- int retval = 512;
-
- if (q && q->limits.logical_block_size)
- retval = q->limits.logical_block_size;
-
- return retval;
+ return q->limits.logical_block_size;
}
static inline unsigned int bdev_logical_block_size(struct block_device *bdev)
@@ -1295,29 +1310,38 @@ static inline bool bdev_nonrot(struct block_device *bdev)
static inline bool bdev_synchronous(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_SYNCHRONOUS,
- &bdev_get_queue(bdev)->queue_flags);
+ return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
}
static inline bool bdev_stable_writes(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_STABLE_WRITES,
- &bdev_get_queue(bdev)->queue_flags);
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE)
+ return true;
+ return q->limits.features & BLK_FEAT_STABLE_WRITES;
+}
+
+static inline bool blk_queue_write_cache(struct request_queue *q)
+{
+ return (q->limits.features & BLK_FEAT_WRITE_CACHE) &&
+ !(q->limits.flags & BLK_FLAG_WRITE_CACHE_DISABLED);
}
static inline bool bdev_write_cache(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags);
+ return blk_queue_write_cache(bdev_get_queue(bdev));
}
static inline bool bdev_fua(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
+ return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
}
static inline bool bdev_nowait(struct block_device *bdev)
{
- return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags);
+ return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT;
}
static inline bool bdev_is_zoned(struct block_device *bdev)
@@ -1359,7 +1383,31 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
static inline int queue_dma_alignment(const struct request_queue *q)
{
- return q ? q->limits.dma_alignment : 511;
+ return q->limits.dma_alignment;
+}
+
+static inline unsigned int
+queue_atomic_write_unit_max_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_unit_max;
+}
+
+static inline unsigned int
+queue_atomic_write_unit_min_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_unit_min;
+}
+
+static inline unsigned int
+queue_atomic_write_boundary_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_boundary_sectors << SECTOR_SHIFT;
+}
+
+static inline unsigned int
+queue_atomic_write_max_bytes(const struct request_queue *q)
+{
+ return q->limits.atomic_write_max_sectors << SECTOR_SHIFT;
}
static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
@@ -1374,10 +1422,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev,
bdev_logical_block_size(bdev) - 1);
}
+static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
+{
+ return lim->dma_alignment | lim->dma_pad_mask;
+}
+
static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
unsigned int len)
{
- unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+ unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
+
return !(addr & alignment) && !(len & alignment);
}
@@ -1563,7 +1617,7 @@ int sync_blockdev(struct block_device *bdev);
int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct path *, struct kstat *, u32);
void printk_all_partitions(void);
int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
@@ -1581,7 +1635,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
static inline void sync_bdevs(bool wait)
{
}
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct path *path, struct kstat *stat,
+ u32 request_mask)
{
}
static inline void printk_all_partitions(void)
@@ -1603,6 +1658,27 @@ struct io_comp_batch {
void (*complete)(struct io_comp_batch *);
};
+static inline bool bdev_can_atomic_write(struct block_device *bdev)
+{
+ struct request_queue *bd_queue = bdev->bd_queue;
+ struct queue_limits *limits = &bd_queue->limits;
+
+ if (!limits->atomic_write_unit_min)
+ return false;
+
+ if (bdev_is_partition(bdev)) {
+ sector_t bd_start_sect = bdev->bd_start_sect;
+ unsigned int alignment =
+ max(limits->atomic_write_unit_min,
+ limits->atomic_write_hw_boundary);
+
+ if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT))
+ return false;
+ }
+
+ return true;
+}
+
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index bd1e361b351c..f41c7f0ef91e 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -280,4 +280,18 @@ static inline void *bvec_virt(struct bio_vec *bvec)
return page_address(bvec->bv_page) + bvec->bv_offset;
}
+/**
+ * bvec_phys - return the physical address for a bvec
+ * @bvec: bvec to return the physical address for
+ */
+static inline phys_addr_t bvec_phys(const struct bio_vec *bvec)
+{
+ /*
+ * Note this open codes page_to_phys because page_to_phys is defined in
+ * <asm/io.h>, which we don't want to pull in here. If it ever moves to
+ * a sensible place we should start using it.
+ */
+ return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset;
+}
+
#endif /* __LINUX_BVEC_H */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82b2195efaca..15d28164bbbd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -358,6 +358,13 @@ struct dm_target {
bool discards_supported:1;
/*
+ * Automatically set by dm-core if this target supports
+ * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated
+ * using REQ_OP_ZONE_RESET. Target drivers must not set this manually.
+ */
+ bool zone_reset_all_supported:1;
+
+ /*
* Set if this target requires that discards be split on
* 'max_discard_sectors' boundaries.
*/
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc9f9c4b2572..fd34b5755c0b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,8 +125,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_EXEC ((__force fmode_t)(1 << 5))
/* File writes are restricted (block device specific) */
#define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6))
+/* File supports atomic writes */
+#define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7))
-/* FMODE_* bits 7 to 8 */
+/* FMODE_* bit 8 */
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)(1 << 9))
@@ -317,6 +319,7 @@ struct readahead_control;
#define IOCB_SYNC (__force int) RWF_SYNC
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
#define IOCB_APPEND (__force int) RWF_APPEND
+#define IOCB_ATOMIC (__force int) RWF_ATOMIC
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
@@ -351,6 +354,7 @@ struct readahead_control;
{ IOCB_SYNC, "SYNC" }, \
{ IOCB_NOWAIT, "NOWAIT" }, \
{ IOCB_APPEND, "APPEND" }, \
+ { IOCB_ATOMIC, "ATOMIC"}, \
{ IOCB_EVENTFD, "EVENTFD"}, \
{ IOCB_DIRECT, "DIRECT" }, \
{ IOCB_WRITE, "WRITE" }, \
@@ -3254,6 +3258,9 @@ extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+ unsigned int unit_min,
+ unsigned int unit_max);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -3430,7 +3437,8 @@ static inline int iocb_flags(struct file *file)
return res;
}
-static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
+static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
+ int rw_type)
{
int kiocb_flags = 0;
@@ -3449,6 +3457,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
return -EOPNOTSUPP;
kiocb_flags |= IOCB_NOIO;
}
+ if (flags & RWF_ATOMIC) {
+ if (rw_type != WRITE)
+ return -EOPNOTSUPP;
+ if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
+ return -EOPNOTSUPP;
+ }
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
@@ -3643,4 +3657,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
return !c;
}
+bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
+
#endif /* _LINUX_FS_H */
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 4109f1bd6128..89ea1ebd975a 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -920,6 +920,9 @@ struct nvmet_fc_target_port {
* further references to hosthandle.
* Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host().
*
+ * @host_traddr: called by the transport to retrieve the node name and
+ * port name of the host port address.
+ *
* @max_hw_queues: indicates the maximum number of hw queues the LLDD
* supports for cpu affinitization.
* Value is Mandatory. Must be at least 1.
@@ -975,6 +978,7 @@ struct nvmet_fc_target_template {
void (*ls_abort)(struct nvmet_fc_target_port *targetport,
void *hosthandle, struct nvmefc_ls_req *lsreq);
void (*host_release)(void *hosthandle);
+ int (*host_traddr)(void *hosthandle, u64 *wwnn, u64 *wwpn);
u32 max_hw_queues;
u16 max_sgl_segments;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c693ac344ec0..c12a329dd463 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -25,6 +25,9 @@
#define NVME_NSID_ALL 0xffffffff
+/* Special NSSR value, 'NVMe' */
+#define NVME_SUBSYS_RESET 0x4E564D65
+
enum nvme_subsys_type {
/* Referral to another discovery type target subsystem */
NVME_NQN_DISC = 1,
@@ -1848,6 +1851,7 @@ enum {
/*
* Generic Command Status:
*/
+ NVME_SCT_GENERIC = 0x0,
NVME_SC_SUCCESS = 0x0,
NVME_SC_INVALID_OPCODE = 0x1,
NVME_SC_INVALID_FIELD = 0x2,
@@ -1895,6 +1899,7 @@ enum {
/*
* Command Specific Status:
*/
+ NVME_SCT_COMMAND_SPECIFIC = 0x100,
NVME_SC_CQ_INVALID = 0x100,
NVME_SC_QID_INVALID = 0x101,
NVME_SC_QUEUE_SIZE = 0x102,
@@ -1968,6 +1973,7 @@ enum {
/*
* Media and Data Integrity Errors:
*/
+ NVME_SCT_MEDIA_ERROR = 0x200,
NVME_SC_WRITE_FAULT = 0x280,
NVME_SC_READ_ERROR = 0x281,
NVME_SC_GUARD_CHECK = 0x282,
@@ -1980,6 +1986,7 @@ enum {
/*
* Path-related Errors:
*/
+ NVME_SCT_PATH = 0x300,
NVME_SC_INTERNAL_PATH_ERROR = 0x300,
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
@@ -1988,11 +1995,17 @@ enum {
NVME_SC_HOST_PATH_ERROR = 0x370,
NVME_SC_HOST_ABORTED_CMD = 0x371,
- NVME_SC_CRD = 0x1800,
- NVME_SC_MORE = 0x2000,
- NVME_SC_DNR = 0x4000,
+ NVME_SC_MASK = 0x00ff, /* Status Code */
+ NVME_SCT_MASK = 0x0700, /* Status Code Type */
+ NVME_SCT_SC_MASK = NVME_SCT_MASK | NVME_SC_MASK,
+
+ NVME_STATUS_CRD = 0x1800, /* Command Retry Delayed */
+ NVME_STATUS_MORE = 0x2000,
+ NVME_STATUS_DNR = 0x4000, /* Do Not Retry */
};
+#define NVME_SCT(status) ((status) >> 8 & 7)
+
struct nvme_completion {
/*
* Used by Admin and Fabrics commands to return data:
diff --git a/include/linux/stat.h b/include/linux/stat.h
index bf92441dbad2..3d900c86981c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -54,6 +54,9 @@ struct kstat {
u32 dio_offset_align;
u64 change_cookie;
u64 subvol;
+ u32 atomic_write_unit_min;
+ u32 atomic_write_unit_max;
+ u32 atomic_write_segments_max;
};
/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 248f4ac95642..2c59fe3efcd4 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -41,18 +41,12 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
{
unsigned int shift = ilog2(queue_logical_block_size(rq->q));
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- if (rq->q->integrity.interval_exp)
- shift = rq->q->integrity.interval_exp;
-#endif
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ rq->q->limits.integrity.interval_exp)
+ shift = rq->q->limits.integrity.interval_exp;
return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
}
-extern const struct blk_integrity_profile t10_pi_type1_crc;
-extern const struct blk_integrity_profile t10_pi_type1_ip;
-extern const struct blk_integrity_profile t10_pi_type3_crc;
-extern const struct blk_integrity_profile t10_pi_type3_ip;
-
struct crc64_pi_tuple {
__be64 guard_tag;
__be16 app_tag;
@@ -72,14 +66,10 @@ static inline u64 ext_pi_ref_tag(struct request *rq)
{
unsigned int shift = ilog2(queue_logical_block_size(rq->q));
-#ifdef CONFIG_BLK_DEV_INTEGRITY
- if (rq->q->integrity.interval_exp)
- shift = rq->q->integrity.interval_exp;
-#endif
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ rq->q->limits.integrity.interval_exp)
+ shift = rq->q->limits.integrity.interval_exp;
return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
}
-extern const struct blk_integrity_profile ext_pi_type1_crc64;
-extern const struct blk_integrity_profile ext_pi_type3_crc64;
-
#endif
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 843106e1109f..70e1262b2e20 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -120,6 +120,7 @@
#define WRITE_SAME_16 0x93
#define ZBC_OUT 0x94
#define ZBC_IN 0x95
+#define WRITE_ATOMIC_16 0x9c
#define SERVICE_ACTION_BIDIRECTIONAL 0x9d
#define SERVICE_ACTION_IN_16 0x9e
#define SERVICE_ACTION_OUT_16 0x9f
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0e128ad51460..1527d5d45e01 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -9,9 +9,17 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/tracepoint.h>
+#include <uapi/linux/ioprio.h>
#define RWBS_LEN 8
+#define IOPRIO_CLASS_STRINGS \
+ { IOPRIO_CLASS_NONE, "none" }, \
+ { IOPRIO_CLASS_RT, "rt" }, \
+ { IOPRIO_CLASS_BE, "be" }, \
+ { IOPRIO_CLASS_IDLE, "idle" }, \
+ { IOPRIO_CLASS_INVALID, "invalid"}
+
#ifdef CONFIG_BUFFER_HEAD
DECLARE_EVENT_CLASS(block_buffer,
@@ -82,6 +90,7 @@ TRACE_EVENT(block_rq_requeue,
__field( dev_t, dev )
__field( sector_t, sector )
__field( unsigned int, nr_sector )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -90,16 +99,20 @@ TRACE_EVENT(block_rq_requeue,
__entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, 0)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), 0)
);
DECLARE_EVENT_CLASS(block_rq_completion,
@@ -113,6 +126,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( int , error )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -122,16 +136,20 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__entry->sector = blk_rq_pos(rq);
__entry->nr_sector = nr_bytes >> 9;
__entry->error = blk_status_to_errno(error);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->error)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error)
);
/**
@@ -180,6 +198,7 @@ DECLARE_EVENT_CLASS(block_rq,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( unsigned int, bytes )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__array( char, comm, TASK_COMM_LEN )
__dynamic_array( char, cmd, 1 )
@@ -190,17 +209,21 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
__entry->bytes = blk_rq_bytes(rq);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
- TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __entry->bytes, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm)
);
/**
diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h
index 8e2d9b1b0e77..05f1945ed204 100644
--- a/include/trace/events/scsi.h
+++ b/include/trace/events/scsi.h
@@ -102,6 +102,7 @@
scsi_opcode_name(WRITE_32), \
scsi_opcode_name(WRITE_SAME_32), \
scsi_opcode_name(ATA_16), \
+ scsi_opcode_name(WRITE_ATOMIC_16), \
scsi_opcode_name(ATA_12))
#define scsi_hostbyte_name(result) { result, #result }
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 45e4e64fd664..191a7e88a8ab 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -329,9 +329,12 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO negation of O_APPEND */
#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 95770941ee2c..887a25286441 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -128,7 +128,13 @@ struct statx {
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
/* 0xa0 */
__u64 stx_subvol; /* Subvolume identifier */
- __u64 __spare3[11]; /* Spare space for future expansion */
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* 0xb0 */
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
+ __u32 __spare1[1];
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -157,6 +163,7 @@ struct statx {
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -192,6 +199,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */