diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 11:43:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 11:43:51 -0700 |
commit | 1592614838cb52f4313ceff64894e2ca78591498 (patch) | |
tree | cdb83d2edd58f274b458fc74ec2903436bc6963a /drivers | |
parent | 10f36b1e80a9f7afdaefe6f0b06dcdf89715eed7 (diff) | |
parent | 766c3297d7e1584394d4af0cc8368e838124b023 (diff) |
Merge tag 'for-5.7/drivers-2020-03-29' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
- floppy driver cleanup series from Willy
- NVMe updates and fixes (Various)
- null_blk trace improvements (Chaitanya)
- bcache fixes (Coly)
- md fixes (via Song)
- loop block size change optimizations (Martijn)
- scnprintf() use (Takashi)
* tag 'for-5.7/drivers-2020-03-29' of git://git.kernel.dk/linux-block: (81 commits)
null_blk: add trace in null_blk_zoned.c
null_blk: add tracepoint helpers for zoned mode
block: add a zone condition debug helper
nvme: cleanup namespace identifier reporting in nvme_init_ns_head
nvme: rename __nvme_find_ns_head to nvme_find_ns_head
nvme: refactor nvme_identify_ns_descs error handling
nvme-tcp: Add warning on state change failure at nvme_tcp_setup_ctrl
nvme-rdma: Add warning on state change failure at nvme_rdma_setup_ctrl
nvme: Fix controller creation races with teardown flow
nvme: Make nvme_uninit_ctrl symmetric to nvme_init_ctrl
nvme: Fix ctrl use-after-free during sysfs deletion
nvme-pci: Re-order nvme_pci_free_ctrl
nvme: Remove unused return code from nvme_delete_ctrl_sync
nvme: Use nvme_state_terminal helper
nvme: release ida resources
nvme: Add compat_ioctl handler for NVME_IOCTL_SUBMIT_IO
nvmet-tcp: optimize tcp stack TX when data digest is used
nvme-fabrics: Use scnprintf() for avoiding potential buffer overflow
nvme-multipath: do not reset on unknown status
nvmet-rdma: allocate RW ctxs according to mdts
...
Diffstat (limited to 'drivers')
34 files changed, 1717 insertions, 888 deletions
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index a53cc1e3a2d3..795facd8cf19 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -6,6 +6,9 @@ # Rewritten to use lists instead of if-statements. # +# needed for trace events +ccflags-y += -I$(src) + obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o @@ -39,6 +42,9 @@ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o null_blk-objs := null_blk_main.o +ifeq ($(CONFIG_BLK_DEV_ZONED), y) +null_blk-$(CONFIG_TRACING) += null_blk_trace.o +endif null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o skd-y := skd_main.o diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 7b32fb673375..a27804d71e12 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -87,9 +87,9 @@ static ssize_t aoedisk_show_netif(struct device *dev, if (*nd == NULL) return snprintf(page, PAGE_SIZE, "none\n"); for (p = page; nd < ne; nd++) - p += snprintf(p, PAGE_SIZE - (p-page), "%s%s", + p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s", p == page ? "" : ",", (*nd)->name); - p += snprintf(p, PAGE_SIZE - (p-page), "\n"); + p += scnprintf(p, PAGE_SIZE - (p-page), "\n"); return p-page; } /* firmware version */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 72a7c3ea2ce3..c094c3c2c5d4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3413,22 +3413,11 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) * the meta-data super block. This function sets MD_DIRTY, and starts a * timer that ensures that within five seconds you have to call drbd_md_sync(). */ -#ifdef DEBUG -void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func) -{ - if (!test_and_set_bit(MD_DIRTY, &device->flags)) { - mod_timer(&device->md_sync_timer, jiffies + HZ); - device->last_md_mark_dirty.line = line; - device->last_md_mark_dirty.func = func; - } -} -#else void drbd_md_mark_dirty(struct drbd_device *device) { if (!test_and_set_bit(MD_DIRTY, &device->flags)) mod_timer(&device->md_sync_timer, jiffies + 5*HZ); } -#endif void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local) { diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8ef65c085640..c3daa64cb52c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -171,7 +171,6 @@ static int print_unex = 1; #include <linux/kernel.h> #include <linux/timer.h> #include <linux/workqueue.h> -#define FDPATCHES #include <linux/fdreg.h> #include <linux/fd.h> #include <linux/hdreg.h> @@ -306,36 +305,26 @@ static bool initialized; /* reverse mapping from unit and fdc to drive */ #define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2)) -#define DP (&drive_params[current_drive]) -#define DRS (&drive_state[current_drive]) -#define DRWE (&write_errors[current_drive]) -#define FDCS (&fdc_state[fdc]) - -#define UDP (&drive_params[drive]) -#define UDRS (&drive_state[drive]) -#define UDRWE (&write_errors[drive]) -#define UFDCS (&fdc_state[FDC(drive)]) - #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) #define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH) -/* read/write */ -#define COMMAND (raw_cmd->cmd[0]) -#define DR_SELECT (raw_cmd->cmd[1]) -#define TRACK (raw_cmd->cmd[2]) -#define HEAD (raw_cmd->cmd[3]) -#define SECTOR (raw_cmd->cmd[4]) -#define SIZECODE (raw_cmd->cmd[5]) -#define SECT_PER_TRACK (raw_cmd->cmd[6]) -#define GAP (raw_cmd->cmd[7]) -#define SIZECODE2 (raw_cmd->cmd[8]) +/* read/write commands */ +#define COMMAND 0 +#define DR_SELECT 1 +#define TRACK 2 +#define HEAD 3 +#define SECTOR 4 +#define SIZECODE 5 +#define SECT_PER_TRACK 6 +#define GAP 7 +#define SIZECODE2 8 #define NR_RW 9 -/* format */ -#define F_SIZECODE (raw_cmd->cmd[2]) -#define F_SECT_PER_TRACK (raw_cmd->cmd[3]) -#define F_GAP (raw_cmd->cmd[4]) -#define F_FILL (raw_cmd->cmd[5]) +/* format commands */ +#define F_SIZECODE 2 +#define F_SECT_PER_TRACK 3 +#define F_GAP 4 +#define F_FILL 5 #define NR_F 6 /* @@ -351,14 +340,14 @@ static bool initialized; #define MAX_REPLIES 16 static unsigned char reply_buffer[MAX_REPLIES]; static int inr; /* size of reply buffer, when called from interrupt */ -#define ST0 (reply_buffer[0]) -#define ST1 (reply_buffer[1]) -#define ST2 (reply_buffer[2]) -#define ST3 (reply_buffer[0]) /* result of GETSTATUS */ -#define R_TRACK (reply_buffer[3]) -#define R_HEAD (reply_buffer[4]) -#define R_SECTOR (reply_buffer[5]) -#define R_SIZECODE (reply_buffer[6]) +#define ST0 0 +#define ST1 1 +#define ST2 2 +#define ST3 0 /* result of GETSTATUS */ +#define R_TRACK 3 +#define R_HEAD 4 +#define R_SECTOR 5 +#define R_SIZECODE 6 #define SEL_DLY (2 * HZ / 100) @@ -593,7 +582,7 @@ static int buffer_max = -1; /* fdc related variables, should end up in a struct */ static struct floppy_fdc_state fdc_state[N_FDC]; -static int fdc; /* current fdc */ +static int current_fdc; /* current fdc */ static struct workqueue_struct *floppy_wq; @@ -604,9 +593,19 @@ static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ +static inline unsigned char fdc_inb(int fdc, int reg) +{ + return fd_inb(fdc_state[fdc].address + reg); +} + +static inline void fdc_outb(unsigned char value, int fdc, int reg) +{ + fd_outb(value, fdc_state[fdc].address + reg); +} + static inline bool drive_no_geom(int drive) { - return !current_type[drive] && !ITYPE(UDRS->fd_device); + return !current_type[drive] && !ITYPE(drive_state[drive].fd_device); } #ifndef fd_eject @@ -630,7 +629,7 @@ static inline void set_debugt(void) static inline void debugt(const char *func, const char *msg) { - if (DP->flags & DEBUGT) + if (drive_params[current_drive].flags & DEBUGT) pr_info("%s:%s dtime=%lu\n", func, msg, jiffies - debugtimer); } #else @@ -683,10 +682,10 @@ static void __reschedule_timeout(int drive, const char *message) delay = 20UL * HZ; drive = 0; } else - delay = UDP->timeout; + delay = drive_params[drive].timeout; mod_delayed_work(floppy_wq, &fd_timeout, delay); - if (UDP->flags & FD_DEBUG) + if (drive_params[drive].flags & FD_DEBUG) DPRINT("reschedule timeout %s\n", message); timeout_message = message; } @@ -740,33 +739,37 @@ static int disk_change(int drive) { int fdc = FDC(drive); - if (time_before(jiffies, UDRS->select_date + UDP->select_delay)) + if (time_before(jiffies, drive_state[drive].select_date + drive_params[drive].select_delay)) DPRINT("WARNING disk change called early\n"); - if (!(FDCS->dor & (0x10 << UNIT(drive))) || - (FDCS->dor & 3) != UNIT(drive) || fdc != FDC(drive)) { + if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))) || + (fdc_state[fdc].dor & 3) != UNIT(drive) || fdc != FDC(drive)) { DPRINT("probing disk change on unselected drive\n"); DPRINT("drive=%d fdc=%d dor=%x\n", drive, FDC(drive), - (unsigned int)FDCS->dor); + (unsigned int)fdc_state[fdc].dor); } - debug_dcl(UDP->flags, + debug_dcl(drive_params[drive].flags, "checking disk change line for drive %d\n", drive); - debug_dcl(UDP->flags, "jiffies=%lu\n", jiffies); - debug_dcl(UDP->flags, "disk change line=%x\n", fd_inb(FD_DIR) & 0x80); - debug_dcl(UDP->flags, "flags=%lx\n", UDRS->flags); - - if (UDP->flags & FD_BROKEN_DCL) - return test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - if ((fd_inb(FD_DIR) ^ UDP->flags) & 0x80) { - set_bit(FD_VERIFY_BIT, &UDRS->flags); + debug_dcl(drive_params[drive].flags, "jiffies=%lu\n", jiffies); + debug_dcl(drive_params[drive].flags, "disk change line=%x\n", + fdc_inb(fdc, FD_DIR) & 0x80); + debug_dcl(drive_params[drive].flags, "flags=%lx\n", + drive_state[drive].flags); + + if (drive_params[drive].flags & FD_BROKEN_DCL) + return test_bit(FD_DISK_CHANGED_BIT, + &drive_state[drive].flags); + if ((fdc_inb(fdc, FD_DIR) ^ drive_params[drive].flags) & 0x80) { + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); /* verify write protection */ - if (UDRS->maxblock) /* mark it changed */ - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); + if (drive_state[drive].maxblock) /* mark it changed */ + set_bit(FD_DISK_CHANGED_BIT, + &drive_state[drive].flags); /* invalidate its geometry */ - if (UDRS->keep_data >= 0) { - if ((UDP->flags & FTD_MSG) && + if (drive_state[drive].keep_data >= 0) { + if ((drive_params[drive].flags & FTD_MSG) && current_type[drive] != NULL) DPRINT("Disk type is undefined after disk change\n"); current_type[drive] = NULL; @@ -775,8 +778,8 @@ static int disk_change(int drive) return 1; } else { - UDRS->last_checked = jiffies; - clear_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags); + drive_state[drive].last_checked = jiffies; + clear_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags); } return 0; } @@ -799,26 +802,26 @@ static int set_dor(int fdc, char mask, char data) unsigned char newdor; unsigned char olddor; - if (FDCS->address == -1) + if (fdc_state[fdc].address == -1) return -1; - olddor = FDCS->dor; + olddor = fdc_state[fdc].dor; newdor = (olddor & mask) | data; if (newdor != olddor) { unit = olddor & 0x3; if (is_selected(olddor, unit) && !is_selected(newdor, unit)) { drive = REVDRIVE(fdc, unit); - debug_dcl(UDP->flags, + debug_dcl(drive_params[drive].flags, "calling disk change from set_dor\n"); disk_change(drive); } - FDCS->dor = newdor; - fd_outb(newdor, FD_DOR); + fdc_state[fdc].dor = newdor; + fdc_outb(newdor, fdc, FD_DOR); unit = newdor & 0x3; if (!is_selected(olddor, unit) && is_selected(newdor, unit)) { drive = REVDRIVE(fdc, unit); - UDRS->select_date = jiffies; + drive_state[drive].select_date = jiffies; } } return olddor; @@ -826,11 +829,12 @@ static int set_dor(int fdc, char mask, char data) static void twaddle(void) { - if (DP->select_delay) + if (drive_params[current_drive].select_delay) return; - fd_outb(FDCS->dor & ~(0x10 << UNIT(current_drive)), FD_DOR); - fd_outb(FDCS->dor, FD_DOR); - DRS->select_date = jiffies; + fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)), + current_fdc, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); + drive_state[current_drive].select_date = jiffies; } /* @@ -841,19 +845,20 @@ static void reset_fdc_info(int mode) { int drive; - FDCS->spec1 = FDCS->spec2 = -1; - FDCS->need_configure = 1; - FDCS->perp_mode = 1; - FDCS->rawcmd = 0; + fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1; + fdc_state[current_fdc].need_configure = 1; + fdc_state[current_fdc].perp_mode = 1; + fdc_state[current_fdc].rawcmd = 0; for (drive = 0; drive < N_DRIVE; drive++) - if (FDC(drive) == fdc && (mode || UDRS->track != NEED_1_RECAL)) - UDRS->track = NEED_2_RECAL; + if (FDC(drive) == current_fdc && + (mode || drive_state[drive].track != NEED_1_RECAL)) + drive_state[drive].track = NEED_2_RECAL; } /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { - unsigned int new_fdc = fdc; + unsigned int new_fdc = current_fdc; if (drive >= 0 && drive < N_DRIVE) { new_fdc = FDC(drive); @@ -863,15 +868,15 @@ static void set_fdc(int drive) pr_info("bad fdc value\n"); return; } - fdc = new_fdc; - set_dor(fdc, ~0, 8); + current_fdc = new_fdc; + set_dor(current_fdc, ~0, 8); #if N_FDC > 1 - set_dor(1 - fdc, ~8, 0); + set_dor(1 - current_fdc, ~8, 0); #endif - if (FDCS->rawcmd == 2) + if (fdc_state[current_fdc].rawcmd == 2) reset_fdc_info(1); - if (fd_inb(FD_STATUS) != STATUS_READY) - FDCS->reset = 1; + if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY) + fdc_state[current_fdc].reset = 1; } /* locks the driver */ @@ -924,19 +929,19 @@ static void floppy_off(unsigned int drive) unsigned long volatile delta; int fdc = FDC(drive); - if (!(FDCS->dor & (0x10 << UNIT(drive)))) + if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive)))) return; del_timer(motor_off_timer + drive); /* make spindle stop in a position which minimizes spinup time * next time */ - if (UDP->rps) { - delta = jiffies - UDRS->first_read_date + HZ - - UDP->spindown_offset; - delta = ((delta * UDP->rps) % HZ) / UDP->rps; + if (drive_params[drive].rps) { + delta = jiffies - drive_state[drive].first_read_date + HZ - + drive_params[drive].spindown_offset; + delta = ((delta * drive_params[drive].rps) % HZ) / drive_params[drive].rps; motor_off_timer[drive].expires = - jiffies + UDP->spindown - delta; + jiffies + drive_params[drive].spindown - delta; } add_timer(motor_off_timer + drive); } @@ -952,20 +957,20 @@ static void scandrives(void) int drive; int saved_drive; - if (DP->select_delay) + if (drive_params[current_drive].select_delay) return; saved_drive = current_drive; for (i = 0; i < N_DRIVE; i++) { drive = (saved_drive + i + 1) % N_DRIVE; - if (UDRS->fd_ref == 0 || UDP->select_delay != 0) + if (drive_state[drive].fd_ref == 0 || drive_params[drive].select_delay != 0) continue; /* skip closed drives */ set_fdc(drive); - if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) & + if (!(set_dor(current_fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) & (0x10 << UNIT(drive)))) /* switch the motor off again, if it was off to * begin with */ - set_dor(fdc, ~(0x10 << UNIT(drive)), 0); + set_dor(current_fdc, ~(0x10 << UNIT(drive)), 0); } set_fdc(saved_drive); } @@ -1011,7 +1016,8 @@ static void cancel_activity(void) * transfer */ static void fd_watchdog(void) { - debug_dcl(DP->flags, "calling disk change from watchdog\n"); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from watchdog\n"); if (disk_change(current_drive)) { DPRINT("disk removed during i/o\n"); @@ -1035,7 +1041,7 @@ static void main_command_interrupt(void) static int fd_wait_for_completion(unsigned long expires, void (*function)(void)) { - if (FDCS->reset) { + if (fdc_state[current_fdc].reset) { reset_fdc(); /* do the reset during sleep to win time * if we don't need to sleep, it's a good * occasion anyways */ @@ -1063,13 +1069,13 @@ static void setup_DMA(void) pr_cont("%x,", raw_cmd->cmd[i]); pr_cont("\n"); cont->done(0); - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return; } if (((unsigned long)raw_cmd->kernel_data) % 512) { pr_info("non aligned address: %p\n", raw_cmd->kernel_data); cont->done(0); - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return; } f = claim_dma_lock(); @@ -1077,10 +1083,11 @@ static void setup_DMA(void) #ifdef fd_dma_setup if (fd_dma_setup(raw_cmd->kernel_data, raw_cmd->length, (raw_cmd->flags & FD_RAW_READ) ? - DMA_MODE_READ : DMA_MODE_WRITE, FDCS->address) < 0) { + DMA_MODE_READ : DMA_MODE_WRITE, + fdc_state[current_fdc].address) < 0) { release_dma_lock(f); cont->done(0); - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return; } release_dma_lock(f); @@ -1091,7 +1098,7 @@ static void setup_DMA(void) DMA_MODE_READ : DMA_MODE_WRITE); fd_set_dma_addr(raw_cmd->kernel_data); fd_set_dma_count(raw_cmd->length); - virtual_dma_port = FDCS->address; + virtual_dma_port = fdc_state[current_fdc].address; fd_enable_dma(); release_dma_lock(f); #endif @@ -1105,18 +1112,18 @@ static int wait_til_ready(void) int status; int counter; - if (FDCS->reset) + if (fdc_state[current_fdc].reset) return -1; for (counter = 0; counter < 10000; counter++) { - status = fd_inb(FD_STATUS); + status = fdc_inb(current_fdc, FD_STATUS); if (status & STATUS_READY) return status; } if (initialized) { - DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc); + DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc); show_floppy(); } - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return -1; } @@ -1129,17 +1136,17 @@ static int output_byte(char byte) return -1; if (is_ready_state(status)) { - fd_outb(byte, FD_DATA); + fdc_outb(byte, current_fdc, FD_DATA); output_log[output_log_pos].data = byte; output_log[output_log_pos].status = status; output_log[output_log_pos].jiffies = jiffies; output_log_pos = (output_log_pos + 1) % OLOGSIZE; return 0; } - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; if (initialized) { DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", - byte, fdc, status); + byte, current_fdc, status); show_floppy(); } return -1; @@ -1162,16 +1169,16 @@ static int result(void) return i; } if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY)) - reply_buffer[i] = fd_inb(FD_DATA); + reply_buffer[i] = fdc_inb(current_fdc, FD_DATA); else break; } if (initialized) { DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n", - fdc, status, i); + current_fdc, status, i); show_floppy(); } - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return -1; } @@ -1208,7 +1215,7 @@ static void perpendicular_mode(void) default: DPRINT("Invalid data rate for perpendicular mode!\n"); cont->done(0); - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; /* * convenient way to return to * redo without too much hassle @@ -1219,12 +1226,12 @@ static void perpendicular_mode(void) } else perp_mode = 0; - if (FDCS->perp_mode == perp_mode) + if (fdc_state[current_fdc].perp_mode == perp_mode) return; - if (FDCS->version >= FDC_82077_ORIG) { + if (fdc_state[current_fdc].version >= FDC_82077_ORIG) { output_byte(FD_PERPENDICULAR); output_byte(perp_mode); - FDCS->perp_mode = perp_mode; + fdc_state[current_fdc].perp_mode = perp_mode; } else if (perp_mode) { DPRINT("perpendicular mode not supported by this FDC.\n"); } @@ -1279,9 +1286,10 @@ static void fdc_specify(void) int hlt_max_code = 0x7f; int hut_max_code = 0xf; - if (FDCS->need_configure && FDCS->version >= FDC_82072A) { + if (fdc_state[current_fdc].need_configure && + fdc_state[current_fdc].version >= FDC_82072A) { fdc_configure(); - FDCS->need_configure = 0; + fdc_state[current_fdc].need_configure = 0; } switch (raw_cmd->rate & 0x03) { @@ -1290,7 +1298,7 @@ static void fdc_specify(void) break; case 1: dtr = 300; - if (FDCS->version >= FDC_82078) { + if (fdc_state[current_fdc].version >= FDC_82078) { /* chose the default rate table, not the one * where 1 = 2 Mbps */ output_byte(FD_DRIVESPEC); @@ -1305,27 +1313,30 @@ static void fdc_specify(void) break; } - if (FDCS->version >= FDC_82072) { + if (fdc_state[current_fdc].version >= FDC_82072) { scale_dtr = dtr; hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */ hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */ } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); + srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000, + NOMINAL_DTR); if (slow_floppy) srt = srt / 4; SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); + hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2, + NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); + hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16, + NOMINAL_DTR); if (hut < 0x1) hut = 0x1; else if (hut > 0xf) @@ -1335,11 +1346,12 @@ static void fdc_specify(void) spec2 = (hlt << 1) | (use_virtual_dma & 1); /* If these parameters did not change, just return with success */ - if (FDCS->spec1 != spec1 || FDCS->spec2 != spec2) { + if (fdc_state[current_fdc].spec1 != spec1 || + fdc_state[current_fdc].spec2 != spec2) { /* Go ahead and set spec1 and spec2 */ output_byte(FD_SPECIFY); - output_byte(FDCS->spec1 = spec1); - output_byte(FDCS->spec2 = spec2); + output_byte(fdc_state[current_fdc].spec1 = spec1); + output_byte(fdc_state[current_fdc].spec2 = spec2); } } /* fdc_specify */ @@ -1350,52 +1362,55 @@ static void fdc_specify(void) static int fdc_dtr(void) { /* If data rate not already set to desired value, set it. */ - if ((raw_cmd->rate & 3) == FDCS->dtr) + if ((raw_cmd->rate & 3) == fdc_state[current_fdc].dtr) return 0; /* Set dtr */ - fd_outb(raw_cmd->rate & 3, FD_DCR); + fdc_outb(raw_cmd->rate & 3, current_fdc, FD_DCR); /* TODO: some FDC/drive combinations (C&T 82C711 with TEAC 1.2MB) * need a stabilization period of several milliseconds to be * enforced after data rate changes before R/W operations. * Pause 5 msec to avoid trouble. (Needs to be 2 jiffies) */ - FDCS->dtr = raw_cmd->rate & 3; + fdc_state[current_fdc].dtr = raw_cmd->rate & 3; return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready); } /* fdc_dtr */ static void tell_sector(void) { pr_cont(": track %d, head %d, sector %d, size %d", - R_TRACK, R_HEAD, R_SECTOR, R_SIZECODE); + reply_buffer[R_TRACK], reply_buffer[R_HEAD], + reply_buffer[R_SECTOR], + reply_buffer[R_SIZECODE]); } /* tell_sector */ static void print_errors(void) { DPRINT(""); - if (ST0 & ST0_ECE) { + if (reply_buffer[ST0] & ST0_ECE) { pr_cont("Recalibrate failed!"); - } else if (ST2 & ST2_CRC) { + } else if (reply_buffer[ST2] & ST2_CRC) { pr_cont("data CRC error"); tell_sector(); - } else if (ST1 & ST1_CRC) { + } else if (reply_buffer[ST1] & ST1_CRC) { pr_cont("CRC error"); tell_sector(); - } else if ((ST1 & (ST1_MAM | ST1_ND)) || - (ST2 & ST2_MAM)) { + } else if ((reply_buffer[ST1] & (ST1_MAM | ST1_ND)) || + (reply_buffer[ST2] & ST2_MAM)) { if (!probing) { pr_cont("sector not found"); tell_sector(); } else pr_cont("probe failed..."); - } else if (ST2 & ST2_WC) { /* seek error */ + } else if (reply_buffer[ST2] & ST2_WC) { /* seek error */ pr_cont("wrong cylinder"); - } else if (ST2 & ST2_BC) { /* cylinder marked as bad */ + } else if (reply_buffer[ST2] & ST2_BC) { /* cylinder marked as bad */ pr_cont("bad cylinder"); } else { pr_cont("unknown error. ST[0..2] are: 0x%x 0x%x 0x%x", - ST0, ST1, ST2); + reply_buffer[ST0], reply_buffer[ST1], + reply_buffer[ST2]); tell_sector(); } pr_cont("\n"); @@ -1414,33 +1429,35 @@ static int interpret_errors(void) if (inr != 7) { DPRINT("-- FDC reply error\n"); - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return 1; } /* check IC to find cause of interrupt */ - switch (ST0 & ST0_INTR) { + switch (reply_buffer[ST0] & ST0_INTR) { case 0x40: /* error occurred during command execution */ - if (ST1 & ST1_EOC) + if (reply_buffer[ST1] & ST1_EOC) return 0; /* occurs with pseudo-DMA */ bad = 1; - if (ST1 & ST1_WP) { + if (reply_buffer[ST1] & ST1_WP) { DPRINT("Drive is write protected\n"); - clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); + clear_bit(FD_DISK_WRITABLE_BIT, + &drive_state[current_drive].flags); cont->done(0); bad = 2; - } else if (ST1 & ST1_ND) { - set_bit(FD_NEED_TWADDLE_BIT, &DRS->flags); - } else if (ST1 & ST1_OR) { - if (DP->flags & FTD_MSG) + } else if (reply_buffer[ST1] & ST1_ND) { + set_bit(FD_NEED_TWADDLE_BIT, + &drive_state[current_drive].flags); + } else if (reply_buffer[ST1] & ST1_OR) { + if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= DP->max_errors.reporting) { + } else if (*errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } - if (ST2 & ST2_WC || ST2 & ST2_BC) + if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) /* wrong cylinder => recal */ - DRS->track = NEED_2_RECAL; + drive_state[current_drive].track = NEED_2_RECAL; return bad; case 0x80: /* invalid command given */ DPRINT("Invalid FDC command given!\n"); @@ -1473,13 +1490,13 @@ static void setup_rw_floppy(void) flags |= FD_RAW_INTR; if ((flags & FD_RAW_SPIN) && !(flags & FD_RAW_NO_MOTOR)) { - ready_date = DRS->spinup_date + DP->spinup; + ready_date = drive_state[current_drive].spinup_date + drive_params[current_drive].spinup; /* If spinup will take a long time, rerun scandrives * again just before spinup completion. Beware that * after scandrives, we must again wait for selection. */ - if (time_after(ready_date, jiffies + DP->select_delay)) { - ready_date -= DP->select_delay; + if (time_after(ready_date, jiffies + drive_params[current_drive].select_delay)) { + ready_date -= drive_params[current_drive].select_delay; function = floppy_start; } else function = setup_rw_floppy; @@ -1522,44 +1539,52 @@ static int blind_seek; static void seek_interrupt(void) { debugt(__func__, ""); - if (inr != 2 || (ST0 & 0xF8) != 0x20) { + if (inr != 2 || (reply_buffer[ST0] & 0xF8) != 0x20) { DPRINT("seek failed\n"); - DRS->track = NEED_2_RECAL; + drive_state[current_drive].track = NEED_2_RECAL; cont->error(); cont->redo(); return; } - if (DRS->track >= 0 && DRS->track != ST1 && !blind_seek) { - debug_dcl(DP->flags, + if (drive_state[current_drive].track >= 0 && + drive_state[current_drive].track != reply_buffer[ST1] && + !blind_seek) { + debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of effective seek\n"); - debug_dcl(DP->flags, "jiffies=%lu\n", jiffies); - clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); + debug_dcl(drive_params[current_drive].flags, "jiffies=%lu\n", + jiffies); + clear_bit(FD_DISK_NEWCHANGE_BIT, + &drive_state[current_drive].flags); /* effective seek */ - DRS->select_date = jiffies; + drive_state[current_drive].select_date = jiffies; } - DRS->track = ST1; + drive_state[current_drive].track = reply_buffer[ST1]; floppy_ready(); } static void check_wp(void) { - if (test_bit(FD_VERIFY_BIT, &DRS->flags)) { + if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) { /* check write protection */ output_byte(FD_GETSTATUS); output_byte(UNIT(current_drive)); if (result() != 1) { - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return; } - clear_bit(FD_VERIFY_BIT, &DRS->flags); - clear_bit(FD_NEED_TWADDLE_BIT, &DRS->flags); - debug_dcl(DP->flags, + clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags); + clear_bit(FD_NEED_TWADDLE_BIT, + &drive_state[current_drive].flags); + debug_dcl(drive_params[current_drive].flags, "checking whether disk is write protected\n"); - debug_dcl(DP->flags, "wp=%x\n", ST3 & 0x40); - if (!(ST3 & 0x40)) - set_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); + debug_dcl(drive_params[current_drive].flags, "wp=%x\n", + reply_buffer[ST3] & 0x40); + if (!(reply_buffer[ST3] & 0x40)) + set_bit(FD_DISK_WRITABLE_BIT, + &drive_state[current_drive].flags); else - clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); + clear_bit(FD_DISK_WRITABLE_BIT, + &drive_state[current_drive].flags); } } @@ -1569,32 +1594,34 @@ static void seek_floppy(void) blind_seek = 0; - debug_dcl(DP->flags, "calling disk change from %s\n", __func__); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from %s\n", __func__); - if (!test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) && + if (!test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) && disk_change(current_drive) && (raw_cmd->flags & FD_RAW_NEED_DISK)) { /* the media changed flag should be cleared after the seek. * If it isn't, this means that there is really no disk in * the drive. */ - set_bit(FD_DISK_CHANGED_BIT, &DRS->flags); + set_bit(FD_DISK_CHANGED_BIT, + &drive_state[current_drive].flags); cont->done(0); cont->redo(); return; } - if (DRS->track <= NEED_1_RECAL) { + if (drive_state[current_drive].track <= NEED_1_RECAL) { recalibrate_floppy(); return; - } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) && + } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) && (raw_cmd->flags & FD_RAW_NEED_DISK) && - (DRS->track <= NO_TRACK || DRS->track == raw_cmd->track)) { + (drive_state[current_drive].track <= NO_TRACK || drive_state[current_drive].track == raw_cmd->track)) { /* we seek to clear the media-changed condition. Does anybody * know a more elegant way, which works on all drives? */ if (raw_cmd->track) track = raw_cmd->track - 1; else { - if (DP->flags & FD_SILENT_DCL_CLEAR) { - set_dor(fdc, ~(0x10 << UNIT(current_drive)), 0); + if (drive_params[current_drive].flags & FD_SILENT_DCL_CLEAR) { + set_dor(current_fdc, ~(0x10 << UNIT(current_drive)), 0); blind_seek = 1; raw_cmd->flags |= FD_RAW_NEED_SEEK; } @@ -1602,7 +1629,7 @@ static void seek_floppy(void) } } else { check_wp(); - if (raw_cmd->track != DRS->track && + if (raw_cmd->track != drive_state[current_drive].track && (raw_cmd->flags & FD_RAW_NEED_SEEK)) track = raw_cmd->track; else { @@ -1625,9 +1652,9 @@ static void recal_interrupt(void) { debugt(__func__, ""); if (inr != 2) - FDCS->reset = 1; - else if (ST0 & ST0_ECE) { - switch (DRS->track) { + fdc_state[current_fdc].reset = 1; + else if (reply_buffer[ST0] & ST0_ECE) { + switch (drive_state[current_drive].track) { case NEED_1_RECAL: debugt(__func__, "need 1 recal"); /* after a second recalibrate, we still haven't @@ -1645,11 +1672,12 @@ static void recal_interrupt(void) * not to move at recalibration is to * be already at track 0.) Clear the * new change flag */ - debug_dcl(DP->flags, + debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of second recalibrate\n"); - clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); - DRS->select_date = jiffies; + clear_bit(FD_DISK_NEWCHANGE_BIT, + &drive_state[current_drive].flags); + drive_state[current_drive].select_date = jiffies; /* fall through */ default: debugt(__func__, "default"); @@ -1659,11 +1687,11 @@ static void recal_interrupt(void) * track 0, this might mean that we * started beyond track 80. Try * again. */ - DRS->track = NEED_1_RECAL; + drive_state[current_drive].track = NEED_1_RECAL; break; } } else - DRS->track = ST1; + drive_state[current_drive].track = reply_buffer[ST1]; floppy_ready(); } @@ -1693,20 +1721,20 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) release_dma_lock(f); do_floppy = NULL; - if (fdc >= N_FDC || FDCS->address == -1) { + if (current_fdc >= N_FDC || fdc_state[current_fdc].address == -1) { /* we don't even know which FDC is the culprit */ pr_info("DOR0=%x\n", fdc_state[0].dor); - pr_info("floppy interrupt on bizarre fdc %d\n", fdc); + pr_info("floppy interrupt on bizarre fdc %d\n", current_fdc); pr_info("handler=%ps\n", handler); is_alive(__func__, "bizarre fdc"); return IRQ_NONE; } - FDCS->reset = 0; + fdc_state[current_fdc].reset = 0; /* We have to clear the reset flag here, because apparently on boxes * with level triggered interrupts (PS/2, Sparc, ...), it is needed to - * emit SENSEI's to clear the interrupt line. And FDCS->reset blocks the - * emission of the SENSEI's. + * emit SENSEI's to clear the interrupt line. And fdc_state[fdc].reset + * blocks the emission of the SENSEI's. * It is OK to emit floppy commands because we are in an interrupt * handler here, and thus we have to fear no interference of other * activity. @@ -1725,11 +1753,11 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) if (do_print) print_result("sensei", inr); max_sensei--; - } while ((ST0 & 0x83) != UNIT(current_drive) && + } while ((reply_buffer[ST0] & 0x83) != UNIT(current_drive) && inr == 2 && max_sensei); } if (!handler) { - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; return IRQ_NONE; } schedule_bh(handler); @@ -1755,7 +1783,7 @@ static void reset_interrupt(void) { debugt(__func__, ""); result(); /* get the status ready for set_fdc */ - if (FDCS->reset) { + if (fdc_state[current_fdc].reset) { pr_info("reset set in interrupt, calling %ps\n", cont->error); cont->error(); /* a reset just after a reset. BAD! */ } @@ -1771,7 +1799,7 @@ static void reset_fdc(void) unsigned long flags; do_floppy = reset_interrupt; - FDCS->reset = 0; + fdc_state[current_fdc].reset = 0; reset_fdc_info(0); /* Pseudo-DMA may intercept 'reset finished' interrupt. */ @@ -1781,12 +1809,13 @@ static void reset_fdc(void) fd_disable_dma(); release_dma_lock(flags); - if (FDCS->version >= FDC_82072A) - fd_outb(0x80 | (FDCS->dtr & 3), FD_STATUS); + if (fdc_state[current_fdc].version >= FDC_82072A) + fdc_outb(0x80 | (fdc_state[current_fdc].dtr & 3), + current_fdc, FD_STATUS); else { - fd_outb(FDCS->dor & ~0x04, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor & ~0x04, current_fdc, FD_DOR); udelay(FD_RESET_DELAY); - fd_outb(FDCS->dor, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); } } @@ -1813,7 +1842,7 @@ static void show_floppy(void) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, reply_buffer, resultsize, true); - pr_info("status=%x\n", fd_inb(FD_STATUS)); + pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS)); pr_info("fdc_busy=%lu\n", fdc_busy); if (do_floppy) pr_info("do_floppy=%ps\n", do_floppy); @@ -1850,7 +1879,7 @@ static void floppy_shutdown(struct work_struct *arg) if (initialized) DPRINT("floppy timeout called\n"); - FDCS->reset = 1; + fdc_state[current_fdc].reset = 1; if (cont) { cont->done(0); cont->redo(); /* this will recall reset when needed */ @@ -1870,29 +1899,29 @@ static int start_motor(void (*function)(void)) mask = 0xfc; data = UNIT(current_drive); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR)) { - if (!(FDCS->dor & (0x10 << UNIT(current_drive)))) { + if (!(fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive)))) { set_debugt(); /* no read since this drive is running */ - DRS->first_read_date = 0; + drive_state[current_drive].first_read_date = 0; /* note motor start time if motor is not yet running */ - DRS->spinup_date = jiffies; + drive_state[current_drive].spinup_date = jiffies; data |= (0x10 << UNIT(current_drive)); } - } else if (FDCS->dor & (0x10 << UNIT(current_drive))) + } else if (fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive))) mask &= ~(0x10 << UNIT(current_drive)); /* starts motor and selects floppy */ del_timer(motor_off_timer + current_drive); - set_dor(fdc, mask, data); + set_dor(current_fdc, mask, data); /* wait_for_completion also schedules reset if needed. */ - return fd_wait_for_completion(DRS->select_date + DP->select_delay, + return fd_wait_for_completion(drive_state[current_drive].select_date + drive_params[current_drive].select_delay, function); } static void floppy_ready(void) { - if (FDCS->reset) { + if (fdc_state[current_fdc].reset) { reset_fdc(); return; } @@ -1901,9 +1930,10 @@ static void floppy_ready(void) if (fdc_dtr()) return; - debug_dcl(DP->flags, "calling disk change from floppy_ready\n"); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from floppy_ready\n"); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) && - disk_change(current_drive) && !DP->select_delay) + disk_change(current_drive) && !drive_params[current_drive].select_delay) twaddle(); /* this clears the dcl on certain * drive/controller combinations */ @@ -1932,8 +1962,9 @@ static void floppy_start(void) reschedule_timeout(current_reqD, "floppy start"); scandrives(); - debug_dcl(DP->flags, "setting NEWCHANGE in floppy_start\n"); - set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); + debug_dcl(drive_params[current_drive].flags, + "setting NEWCHANGE in floppy_start\n"); + set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags); floppy_ready(); } @@ -1991,7 +2022,7 @@ static int wait_til_done(void (*handler)(void), bool interruptible) return -EINTR; } - if (FDCS->reset) + if (fdc_state[current_fdc].reset) command_status = FD_COMMAND_ERROR; if (command_status == FD_COMMAND_OKAY) ret = 0; @@ -2032,14 +2063,14 @@ static int next_valid_format(void) { int probed_format; - probed_format = DRS->probed_format; + probed_format = drive_state[current_drive].probed_format; while (1) { - if (probed_format >= 8 || !DP->autodetect[probed_format]) { - DRS->probed_format = 0; + if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) { + drive_state[current_drive].probed_format = 0; return 1; } - if (floppy_type[DP->autodetect[probed_format]].sect) { - DRS->probed_format = probed_format; + if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) { + drive_state[current_drive].probed_format = probed_format; return 0; } probed_format++; @@ -2051,23 +2082,23 @@ static void bad_flp_intr(void) int err_count; if (probing) { - DRS->probed_format++; + drive_state[current_drive].probed_format++; if (!next_valid_format()) return; } err_count = ++(*errors); - INFBOUND(DRWE->badness, err_count); - if (err_count > DP->max_errors.abort) + INFBOUND(write_errors[current_drive].badness, err_count); + if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); - if (err_count > DP->max_errors.reset) - FDCS->reset = 1; - else if (err_count > DP->max_errors.recal) - DRS->track = NEED_2_RECAL; + if (err_count > drive_params[current_drive].max_errors.reset) + fdc_state[current_fdc].reset = 1; + else if (err_count > drive_params[current_drive].max_errors.recal) + drive_state[current_drive].track = NEED_2_RECAL; } static void set_floppy(int drive) { - int type = ITYPE(UDRS->fd_device); + int type = ITYPE(drive_state[drive].fd_device); if (type) _floppy = floppy_type + type; @@ -2113,28 +2144,28 @@ static void setup_format_params(int track) FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK); raw_cmd->rate = _floppy->rate & 0x43; raw_cmd->cmd_count = NR_F; - COMMAND = FM_MODE(_floppy, FD_FORMAT); - DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head); - F_SIZECODE = FD_SIZECODE(_floppy); - F_SECT_PER_TRACK = _floppy->sect << 2 >> F_SIZECODE; - F_GAP = _floppy->fmt_gap; - F_FILL = FD_FILL_BYTE; + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_FORMAT); + raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head); + raw_cmd->cmd[F_SIZECODE] = FD_SIZECODE(_floppy); + raw_cmd->cmd[F_SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[F_SIZECODE]; + raw_cmd->cmd[F_GAP] = _floppy->fmt_gap; + raw_cmd->cmd[F_FILL] = FD_FILL_BYTE; raw_cmd->kernel_data = floppy_track_buffer; - raw_cmd->length = 4 * F_SECT_PER_TRACK; + raw_cmd->length = 4 * raw_cmd->cmd[F_SECT_PER_TRACK]; - if (!F_SECT_PER_TRACK) + if (!raw_cmd->cmd[F_SECT_PER_TRACK]) return; /* allow for about 30ms for data transport per track */ - head_shift = (F_SECT_PER_TRACK + 5) / 6; + head_shift = (raw_cmd->cmd[F_SECT_PER_TRACK] + 5) / 6; /* a ``cylinder'' is two tracks plus a little stepping time */ track_shift = 2 * head_shift + 3; /* position of logical sector 1 on this track */ n = (track_shift * format_req.track + head_shift * format_req.head) - % F_SECT_PER_TRACK; + % raw_cmd->cmd[F_SECT_PER_TRACK]; /* determine interleave */ il = 1; @@ -2142,27 +2173,27 @@ static void setup_format_params(int track) il++; /* initialize field */ - for (count = 0; count < F_SECT_PER_TRACK; ++count) { + for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) { here[count].track = format_req.track; here[count].head = format_req.head; here[count].sect = 0; - here[count].size = F_SIZECODE; + here[count].size = raw_cmd->cmd[F_SIZECODE]; } /* place logical sectors */ - for (count = 1; count <= F_SECT_PER_TRACK; ++count) { + for (count = 1; count <= raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) { here[n].sect = count; - n = (n + il) % F_SECT_PER_TRACK; + n = (n + il) % raw_cmd->cmd[F_SECT_PER_TRACK]; if (here[n].sect) { /* sector busy, find next free sector */ ++n; - if (n >= F_SECT_PER_TRACK) { - n -= F_SECT_PER_TRACK; + if (n >= raw_cmd->cmd[F_SECT_PER_TRACK]) { + n -= raw_cmd->cmd[F_SECT_PER_TRACK]; while (here[n].sect) ++n; } } } if (_floppy->stretch & FD_SECTBASEMASK) { - for (count = 0; count < F_SECT_PER_TRACK; count++) + for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; count++) here[count].sect += FD_SECTBASE(_floppy) - 1; } } @@ -2191,7 +2222,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req) set_floppy(drive); if (!_floppy || - _floppy->track > DP->tracks || + _floppy->track > drive_params[current_drive].tracks || tmp_format_req->track >= _floppy->track || tmp_format_req->head >= _floppy->head || (_floppy->sect << 2) % (1 << FD_SIZECODE(_floppy)) || @@ -2253,21 +2284,21 @@ static void request_done(int uptodate) /* maintain values for invalidation on geometry * change */ block = current_count_sectors + blk_rq_pos(req); - INFBOUND(DRS->maxblock, block); + INFBOUND(drive_state[current_drive].maxblock, block); if (block > _floppy->sect) - DRS->maxtrack = 1; + drive_state[current_drive].maxtrack = 1; floppy_end_request(req, 0); } else { if (rq_data_dir(req) == WRITE) { /* record write error information */ - DRWE->write_errors++; - if (DRWE->write_errors == 1) { - DRWE->first_error_sector = blk_rq_pos(req); - DRWE->first_error_generation = DRS->generation; + write_errors[current_drive].write_errors++; + if (write_errors[current_drive].write_errors == 1) { + write_errors[current_drive].first_error_sector = blk_rq_pos(req); + write_errors[current_drive].first_error_generation = drive_state[current_drive].generation; } - DRWE->last_error_sector = blk_rq_pos(req); - DRWE->last_error_generation = DRS->generation; + write_errors[current_drive].last_error_sector = blk_rq_pos(req); + write_errors[current_drive].last_error_generation = drive_state[current_drive].generation; } floppy_end_request(req, BLK_STS_IOERR); } @@ -2281,43 +2312,46 @@ static void rw_interrupt(void) int heads; int nr_sectors; - if (R_HEAD >= 2) { + if (reply_buffer[R_HEAD] >= 2) { /* some Toshiba floppy controllers occasionnally seem to * return bogus interrupts after read/write operations, which * can be recognized by a bad head number (>= 2) */ return; } - if (!DRS->first_read_date) - DRS->first_read_date = jiffies; + if (!drive_state[current_drive].first_read_date) + drive_state[current_drive].first_read_date = jiffies; nr_sectors = 0; - ssize = DIV_ROUND_UP(1 << SIZECODE, 4); + ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); - if (ST1 & ST1_EOC) + if (reply_buffer[ST1] & ST1_EOC) eoc = 1; else eoc = 0; - if (COMMAND & 0x80) + if (raw_cmd->cmd[COMMAND] & 0x80) heads = 2; else heads = 1; - nr_sectors = (((R_TRACK - TRACK) * heads + - R_HEAD - HEAD) * SECT_PER_TRACK + - R_SECTOR - SECTOR + eoc) << SIZECODE >> 2; + nr_sectors = (((reply_buffer[R_TRACK] - raw_cmd->cmd[TRACK]) * heads + + reply_buffer[R_HEAD] - raw_cmd->cmd[HEAD]) * raw_cmd->cmd[SECT_PER_TRACK] + + reply_buffer[R_SECTOR] - raw_cmd->cmd[SECTOR] + eoc) << raw_cmd->cmd[SIZECODE] >> 2; if (nr_sectors / ssize > DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); - pr_info("rs=%d s=%d\n", R_SECTOR, SECTOR); - pr_info("rh=%d h=%d\n", R_HEAD, HEAD); - pr_info("rt=%d t=%d\n", R_TRACK, TRACK); + pr_info("rs=%d s=%d\n", reply_buffer[R_SECTOR], + raw_cmd->cmd[SECTOR]); + pr_info("rh=%d h=%d\n", reply_buffer[R_HEAD], + raw_cmd->cmd[HEAD]); + pr_info("rt=%d t=%d\n", reply_buffer[R_TRACK], + raw_cmd->cmd[TRACK]); pr_info("heads=%d eoc=%d\n", heads, eoc); pr_info("spt=%d st=%d ss=%d\n", - SECT_PER_TRACK, fsector_t, ssize); + raw_cmd->cmd[SECT_PER_TRACK], fsector_t, ssize); pr_info("in_sector_offset=%d\n", in_sector_offset); } @@ -2347,7 +2381,7 @@ static void rw_interrupt(void) } if (probing) { - if (DP->flags & FTD_MSG) + if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Auto-detected floppy type %s in fd%d\n", _floppy->name, current_drive); current_type[current_drive] = _floppy; @@ -2355,11 +2389,11 @@ static void rw_interrupt(void) probing = 0; } - if (CT(COMMAND) != FD_READ || + if (CT(raw_cmd->cmd[COMMAND]) != FD_READ || raw_cmd->kernel_data == bio_data(current_req->bio)) { /* transfer directly from buffer */ cont->done(1); - } else if (CT(COMMAND) == FD_READ) { + } else if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) { buffer_track = raw_cmd->track; buffer_drive = current_drive; INFBOUND(buffer_max, nr_sectors + fsector_t); @@ -2418,13 +2452,13 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) min(max_sector, max_sector_2), blk_rq_sectors(current_req)); - if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && + if (current_count_sectors <= 0 && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE && buffer_max > fsector_t + blk_rq_sectors(current_req)) current_count_sectors = min_t(int, buffer_max - fsector_t, blk_rq_sectors(current_req)); remaining = current_count_sectors << 9; - if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) { + if (remaining > blk_rq_bytes(current_req) && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { DPRINT("in copy buffer\n"); pr_info("current_count_sectors=%ld\n", current_count_sectors); pr_info("remaining=%d\n", remaining >> 9); @@ -2459,16 +2493,16 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) fsector_t, buffer_min); pr_info("current_count_sectors=%ld\n", current_count_sectors); - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) pr_info("read\n"); - if (CT(COMMAND) == FD_WRITE) + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) pr_info("write\n"); break; } if (((unsigned long)buffer) % 512) DPRINT("%p buffer not aligned\n", buffer); - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) memcpy(buffer, dma_buffer, size); else memcpy(dma_buffer, buffer, size); @@ -2486,7 +2520,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) /* work around a bug in pseudo DMA * (on some FDCs) pseudo DMA does not stop when the CPU stops * sending data. Hence we need a different way to signal the - * transfer length: We use SECT_PER_TRACK. Unfortunately, this + * transfer length: We use raw_cmd->cmd[SECT_PER_TRACK]. Unfortunately, this * does not work with MT, hence we can only transfer one head at * a time */ @@ -2495,18 +2529,18 @@ static void virtualdmabug_workaround(void) int hard_sectors; int end_sector; - if (CT(COMMAND) == FD_WRITE) { - COMMAND &= ~0x80; /* switch off multiple track mode */ + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { + raw_cmd->cmd[COMMAND] &= ~0x80; /* switch off multiple track mode */ - hard_sectors = raw_cmd->length >> (7 + SIZECODE); - end_sector = SECTOR + hard_sectors - 1; - if (end_sector > SECT_PER_TRACK) { + hard_sectors = raw_cmd->length >> (7 + raw_cmd->cmd[SIZECODE]); + end_sector = raw_cmd->cmd[SECTOR] + hard_sectors - 1; + if (end_sector > raw_cmd->cmd[SECT_PER_TRACK]) { pr_info("too many sectors %d > %d\n", - end_sector, SECT_PER_TRACK); + end_sector, raw_cmd->cmd[SECT_PER_TRACK]); return; } - SECT_PER_TRACK = end_sector; - /* make sure SECT_PER_TRACK + raw_cmd->cmd[SECT_PER_TRACK] = end_sector; + /* make sure raw_cmd->cmd[SECT_PER_TRACK] * points to end of transfer */ } } @@ -2539,10 +2573,10 @@ static int make_raw_rw_request(void) raw_cmd->cmd_count = NR_RW; if (rq_data_dir(current_req) == READ) { raw_cmd->flags |= FD_RAW_READ; - COMMAND = FM_MODE(_floppy, FD_READ); + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ); } else if (rq_data_dir(current_req) == WRITE) { raw_cmd->flags |= FD_RAW_WRITE; - COMMAND = FM_MODE(_floppy, FD_WRITE); + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_WRITE); } else { DPRINT("%s: unknown command\n", __func__); return 0; @@ -2550,24 +2584,24 @@ static int make_raw_rw_request(void) max_sector = _floppy->sect * _floppy->head; - TRACK = (int)blk_rq_pos(current_req) / max_sector; + raw_cmd->cmd[TRACK] = (int)blk_rq_pos(current_req) / max_sector; fsector_t = (int)blk_rq_pos(current_req) % max_sector; - if (_floppy->track && TRACK >= _floppy->track) { + if (_floppy->track && raw_cmd->cmd[TRACK] >= _floppy->track) { if (blk_rq_cur_sectors(current_req) & 1) { current_count_sectors = 1; return 1; } else return 0; } - HEAD = fsector_t / _floppy->sect; + raw_cmd->cmd[HEAD] = fsector_t / _floppy->sect; if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || - test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) && + test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) && fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ - if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)) { + if ((_floppy->rate & FD_2M) && (!raw_cmd->cmd[TRACK]) && (!raw_cmd->cmd[HEAD])) { max_sector = 2 * _floppy->sect / 3; if (fsector_t >= max_sector) { current_count_sectors = @@ -2575,23 +2609,24 @@ static int make_raw_rw_request(void) blk_rq_sectors(current_req)); return 1; } - SIZECODE = 2; + raw_cmd->cmd[SIZECODE] = 2; } else - SIZECODE = FD_SIZECODE(_floppy); + raw_cmd->cmd[SIZECODE] = FD_SIZECODE(_floppy); raw_cmd->rate = _floppy->rate & 0x43; - if ((_floppy->rate & FD_2M) && (TRACK || HEAD) && raw_cmd->rate == 2) + if ((_floppy->rate & FD_2M) && + (raw_cmd->cmd[TRACK] || raw_cmd->cmd[HEAD]) && raw_cmd->rate == 2) raw_cmd->rate = 1; - if (SIZECODE) - SIZECODE2 = 0xff; + if (raw_cmd->cmd[SIZECODE]) + raw_cmd->cmd[SIZECODE2] = 0xff; else - SIZECODE2 = 0x80; - raw_cmd->track = TRACK << STRETCH(_floppy); - DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, HEAD); - GAP = _floppy->gap; - ssize = DIV_ROUND_UP(1 << SIZECODE, 4); - SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + + raw_cmd->cmd[SIZECODE2] = 0x80; + raw_cmd->track = raw_cmd->cmd[TRACK] << STRETCH(_floppy); + raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, raw_cmd->cmd[HEAD]); + raw_cmd->cmd[GAP] = _floppy->gap; + ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); + raw_cmd->cmd[SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[SIZECODE]; + raw_cmd->cmd[SECTOR] = ((fsector_t % _floppy->sect) << 2 >> raw_cmd->cmd[SIZECODE]) + FD_SECTBASE(_floppy); /* tracksize describes the size which can be filled up with sectors @@ -2599,24 +2634,24 @@ static int make_raw_rw_request(void) */ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect) { - SECT_PER_TRACK++; + raw_cmd->cmd[SECT_PER_TRACK]++; if (tracksize <= fsector_t % _floppy->sect) - SECTOR--; + raw_cmd->cmd[SECTOR]--; /* if we are beyond tracksize, fill up using smaller sectors */ while (tracksize <= fsector_t % _floppy->sect) { while (tracksize + ssize > _floppy->sect) { - SIZECODE--; + raw_cmd->cmd[SIZECODE]--; ssize >>= 1; } - SECTOR++; - SECT_PER_TRACK++; + raw_cmd->cmd[SECTOR]++; + raw_cmd->cmd[SECT_PER_TRACK]++; tracksize += ssize; } - max_sector = HEAD * _floppy->sect + tracksize; - } else if (!TRACK && !HEAD && !(_floppy->rate & FD_2M) && probing) { + max_sector = raw_cmd->cmd[HEAD] * _floppy->sect + tracksize; + } else if (!raw_cmd->cmd[TRACK] && !raw_cmd->cmd[HEAD] && !(_floppy->rate & FD_2M) && probing) { max_sector = _floppy->sect; - } else if (!HEAD && CT(COMMAND) == FD_WRITE) { + } else if (!raw_cmd->cmd[HEAD] && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { /* for virtual DMA bug workaround */ max_sector = _floppy->sect; } @@ -2628,12 +2663,12 @@ static int make_raw_rw_request(void) (current_drive == buffer_drive) && (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ - if (CT(COMMAND) == FD_READ) { + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) { copy_buffer(1, max_sector, buffer_max); return 1; } } else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) { - if (CT(COMMAND) == FD_WRITE) { + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { unsigned int sectors; sectors = fsector_t + blk_rq_sectors(current_req); @@ -2644,7 +2679,7 @@ static int make_raw_rw_request(void) } raw_cmd->flags &= ~FD_RAW_WRITE; raw_cmd->flags |= FD_RAW_READ; - COMMAND = FM_MODE(_floppy, FD_READ); + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ); } else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) { unsigned long dma_limit; int direct, indirect; @@ -2677,9 +2712,9 @@ static int make_raw_rw_request(void) */ if (!direct || (indirect * 2 > direct * 3 && - *errors < DP->max_errors.read_track && + *errors < drive_params[current_drive].max_errors.read_track && ((!probing || - (DP->read_track & (1 << DRS->probed_format)))))) { + (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) { max_size = blk_rq_sectors(current_req); } else { raw_cmd->kernel_data = bio_data(current_req->bio); @@ -2695,7 +2730,7 @@ static int make_raw_rw_request(void) } } - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) max_size = max_sector; /* unbounded */ /* claim buffer track if needed */ @@ -2703,7 +2738,7 @@ static int make_raw_rw_request(void) buffer_drive != current_drive || /* bad drive */ fsector_t > buffer_max || fsector_t < buffer_min || - ((CT(COMMAND) == FD_READ || + ((CT(raw_cmd->cmd[COMMAND]) == FD_READ || (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) && max_sector > 2 * max_buffer_sectors + buffer_min && max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)) { @@ -2715,7 +2750,7 @@ static int make_raw_rw_request(void) raw_cmd->kernel_data = floppy_track_buffer + ((aligned_sector_t - buffer_min) << 9); - if (CT(COMMAND) == FD_WRITE) { + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { /* copy write buffer to track buffer. * if we get here, we know that the write * is either aligned or the data already in the buffer @@ -2737,10 +2772,10 @@ static int make_raw_rw_request(void) raw_cmd->length <<= 9; if ((raw_cmd->length < current_count_sectors << 9) || (raw_cmd->kernel_data != bio_data(current_req->bio) && - CT(COMMAND) == FD_WRITE && + CT(raw_cmd->cmd[COMMAND]) == FD_WRITE && (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || aligned_sector_t < buffer_min)) || - raw_cmd->length % (128 << SIZECODE) || + raw_cmd->length % (128 << raw_cmd->cmd[SIZECODE]) || raw_cmd->length <= 0 || current_count_sectors <= 0) { DPRINT("fractionary current count b=%lx s=%lx\n", raw_cmd->length, current_count_sectors); @@ -2751,9 +2786,10 @@ static int make_raw_rw_request(void) current_count_sectors); pr_info("st=%d ast=%d mse=%d msi=%d\n", fsector_t, aligned_sector_t, max_sector, max_size); - pr_info("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); + pr_info("ssize=%x SIZECODE=%d\n", ssize, raw_cmd->cmd[SIZECODE]); pr_info("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", - COMMAND, SECTOR, HEAD, TRACK); + raw_cmd->cmd[COMMAND], raw_cmd->cmd[SECTOR], + raw_cmd->cmd[HEAD], raw_cmd->cmd[TRACK]); pr_info("buffer drive=%d\n", buffer_drive); pr_info("buffer track=%d\n", buffer_track); pr_info("buffer_min=%d\n", buffer_min); @@ -2772,9 +2808,9 @@ static int make_raw_rw_request(void) fsector_t, buffer_min, raw_cmd->length >> 9); pr_info("current_count_sectors=%ld\n", current_count_sectors); - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) pr_info("read\n"); - if (CT(COMMAND) == FD_WRITE) + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) pr_info("write\n"); return 0; } @@ -2841,14 +2877,14 @@ do_request: disk_change(current_drive); if (test_bit(current_drive, &fake_change) || - test_bit(FD_DISK_CHANGED_BIT, &DRS->flags)) { + test_bit(FD_DISK_CHANGED_BIT, &drive_state[current_drive].flags)) { DPRINT("disk absent or changed during operation\n"); request_done(0); goto do_request; } if (!_floppy) { /* Autodetection */ if (!probing) { - DRS->probed_format = 0; + drive_state[current_drive].probed_format = 0; if (next_valid_format()) { DPRINT("no autodetectable formats\n"); _floppy = NULL; @@ -2857,7 +2893,7 @@ do_request: } } probing = 1; - _floppy = floppy_type + DP->autodetect[DRS->probed_format]; + _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; errors = &(current_req->error_count); @@ -2867,7 +2903,7 @@ do_request: goto do_request; } - if (test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) + if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) twaddle(); schedule_bh(floppy_start); debugt(__func__, "queue fd request"); @@ -2936,8 +2972,9 @@ static int poll_drive(bool interruptible, int flag) raw_cmd->track = 0; raw_cmd->cmd_count = 0; cont = &poll_cont; - debug_dcl(DP->flags, "setting NEWCHANGE in poll_drive\n"); - set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); + debug_dcl(drive_params[current_drive].flags, + "setting NEWCHANGE in poll_drive\n"); + set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags); return wait_til_done(floppy_ready, interruptible); } @@ -2967,8 +3004,8 @@ static int user_reset_fdc(int drive, int arg, bool interruptible) return -EINTR; if (arg == FD_RESET_ALWAYS) - FDCS->reset = 1; - if (FDCS->reset) { + fdc_state[current_fdc].reset = 1; + if (fdc_state[current_fdc].reset) { cont = &reset_cont; ret = wait_til_done(reset_fdc, interruptible); if (ret == -EINTR) @@ -3001,8 +3038,8 @@ static const char *drive_name(int type, int drive) if (type) floppy = floppy_type + type; else { - if (UDP->native_format) - floppy = floppy_type + UDP->native_format; + if (drive_params[drive].native_format) + floppy = floppy_type + drive_params[drive].native_format; else return "(null)"; } @@ -3179,23 +3216,23 @@ static int raw_cmd_ioctl(int cmd, void __user *param) int ret2; int ret; - if (FDCS->rawcmd <= 1) - FDCS->rawcmd = 1; + if (fdc_state[current_fdc].rawcmd <= 1) + fdc_state[current_fdc].rawcmd = 1; for (drive = 0; drive < N_DRIVE; drive++) { - if (FDC(drive) != fdc) + if (FDC(drive) != current_fdc) continue; if (drive == current_drive) { - if (UDRS->fd_ref > 1) { - FDCS->rawcmd = 2; + if (drive_state[drive].fd_ref > 1) { + fdc_state[current_fdc].rawcmd = 2; break; } - } else if (UDRS->fd_ref) { - FDCS->rawcmd = 2; + } else if (drive_state[drive].fd_ref) { + fdc_state[current_fdc].rawcmd = 2; break; } } - if (FDCS->reset) + if (fdc_state[current_fdc].reset) return -EIO; ret = raw_cmd_copyin(cmd, param, &my_raw_cmd); @@ -3207,12 +3244,13 @@ static int raw_cmd_ioctl(int cmd, void __user *param) raw_cmd = my_raw_cmd; cont = &raw_cmd_cont; ret = wait_til_done(floppy_start, true); - debug_dcl(DP->flags, "calling disk change from raw_cmd ioctl\n"); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from raw_cmd ioctl\n"); - if (ret != -EINTR && FDCS->reset) + if (ret != -EINTR && fdc_state[current_fdc].reset) ret = -EIO; - DRS->track = NO_TRACK; + drive_state[current_drive].track = NO_TRACK; ret2 = raw_cmd_copyout(cmd, param, my_raw_cmd); if (!ret) @@ -3240,9 +3278,9 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, (int)g->head <= 0 || /* check for overflow in max_sector */ (int)(g->sect * g->head) <= 0 || - /* check for zero in F_SECT_PER_TRACK */ + /* check for zero in raw_cmd->cmd[F_SECT_PER_TRACK] */ (unsigned char)((g->sect << 2) >> FD_SIZECODE(g)) == 0 || - g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || + g->track <= 0 || g->track > drive_params[drive].tracks >> STRETCH(g) || /* check if reserved bits are set */ (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) return -EINVAL; @@ -3285,16 +3323,16 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, current_type[drive] = &user_params[drive]; floppy_sizes[drive] = user_params[drive].size; if (cmd == FDDEFPRM) - DRS->keep_data = -1; + drive_state[current_drive].keep_data = -1; else - DRS->keep_data = 1; + drive_state[current_drive].keep_data = 1; /* invalidation. Invalidate only when needed, i.e. * when there are already sectors in the buffer cache * whose number will change. This is useful, because * mtools often changes the geometry of the disk after * looking at the boot block */ - if (DRS->maxblock > user_params[drive].sect || - DRS->maxtrack || + if (drive_state[current_drive].maxblock > user_params[drive].sect || + drive_state[current_drive].maxtrack || ((user_params[drive].sect ^ oldStretch) & (FD_SWAPSIDES | FD_SECTBASEMASK))) invalidate_drive(bdev); @@ -3407,7 +3445,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int unsigned long param) { int drive = (long)bdev->bd_disk->private_data; - int type = ITYPE(UDRS->fd_device); + int type = ITYPE(drive_state[drive].fd_device); int i; int ret; int size; @@ -3455,7 +3493,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int switch (cmd) { case FDEJECT: - if (UDRS->fd_ref != 1) + if (drive_state[drive].fd_ref != 1) /* somebody else has this drive open */ return -EBUSY; if (lock_fdc(drive)) @@ -3465,8 +3503,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int * non-Sparc architectures */ ret = fd_eject(UNIT(drive)); - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - set_bit(FD_VERIFY_BIT, &UDRS->flags); + set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); process_fd_request(); return ret; case FDCLRPRM: @@ -3474,7 +3512,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EINTR; current_type[drive] = NULL; floppy_sizes[drive] = MAX_DISK_SIZE << 1; - UDRS->keep_data = 0; + drive_state[drive].keep_data = 0; return invalidate_drive(bdev); case FDSETPRM: case FDDEFPRM: @@ -3489,17 +3527,17 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int outparam = &inparam.g; break; case FDMSGON: - UDP->flags |= FTD_MSG; + drive_params[drive].flags |= FTD_MSG; return 0; case FDMSGOFF: - UDP->flags &= ~FTD_MSG; + drive_params[drive].flags &= ~FTD_MSG; return 0; case FDFMTBEG: if (lock_fdc(drive)) return -EINTR; if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) return -EINTR; - ret = UDRS->flags; + ret = drive_state[drive].flags; process_fd_request(); if (ret & FD_VERIFY) return -ENODEV; @@ -3507,7 +3545,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EROFS; return 0; case FDFMTTRK: - if (UDRS->fd_ref != 1) + if (drive_state[drive].fd_ref != 1) return -EBUSY; return do_format(drive, &inparam.f); case FDFMTEND: @@ -3516,13 +3554,13 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EINTR; return invalidate_drive(bdev); case FDSETEMSGTRESH: - UDP->max_errors.reporting = (unsigned short)(param & 0x0f); + drive_params[drive].max_errors.reporting = (unsigned short)(param & 0x0f); return 0; case FDGETMAXERRS: - outparam = &UDP->max_errors; + outparam = &drive_params[drive].max_errors; break; case FDSETMAXERRS: - UDP->max_errors = inparam.max_errors; + drive_params[drive].max_errors = inparam.max_errors; break; case FDGETDRVTYP: outparam = drive_name(type, drive); @@ -3532,10 +3570,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int if (!valid_floppy_drive_params(inparam.dp.autodetect, inparam.dp.native_format)) return -EINVAL; - *UDP = inparam.dp; + drive_params[drive] = inparam.dp; break; case FDGETDRVPRM: - outparam = UDP; + outparam = &drive_params[drive]; break; case FDPOLLDRVSTAT: if (lock_fdc(drive)) @@ -3545,18 +3583,18 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int process_fd_request(); /* fall through */ case FDGETDRVSTAT: - outparam = UDRS; + outparam = &drive_state[drive]; break; case FDRESET: return user_reset_fdc(drive, (int)param, true); case FDGETFDCSTAT: - outparam = UFDCS; + outparam = &fdc_state[FDC(drive)]; break; case FDWERRORCLR: - memset(UDRWE, 0, sizeof(*UDRWE)); + memset(&write_errors[drive], 0, sizeof(write_errors[drive])); return 0; case FDWERRORGET: - outparam = UDRWE; + outparam = &write_errors[drive]; break; case FDRAWCMD: if (type) @@ -3692,7 +3730,7 @@ static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned mutex_lock(&floppy_mutex); drive = (long)bdev->bd_disk->private_data; - type = ITYPE(UDRS->fd_device); + type = ITYPE(drive_state[drive].fd_device); err = set_geometry(cmd == FDSETPRM32 ? FDSETPRM : FDDEFPRM, &v, drive, type, bdev); mutex_unlock(&floppy_mutex); @@ -3708,7 +3746,8 @@ static int compat_get_prm(int drive, memset(&v, 0, sizeof(v)); mutex_lock(&floppy_mutex); - err = get_floppy_geometry(drive, ITYPE(UDRS->fd_device), &p); + err = get_floppy_geometry(drive, ITYPE(drive_state[drive].fd_device), + &p); if (err) { mutex_unlock(&floppy_mutex); return err; @@ -3732,25 +3771,26 @@ static int compat_setdrvprm(int drive, if (!valid_floppy_drive_params(v.autodetect, v.native_format)) return -EINVAL; mutex_lock(&floppy_mutex); - UDP->cmos = v.cmos; - UDP->max_dtr = v.max_dtr; - UDP->hlt = v.hlt; - UDP->hut = v.hut; - UDP->srt = v.srt; - UDP->spinup = v.spinup; - UDP->spindown = v.spindown; - UDP->spindown_offset = v.spindown_offset; - UDP->select_delay = v.select_delay; - UDP->rps = v.rps; - UDP->tracks = v.tracks; - UDP->timeout = v.timeout; - UDP->interleave_sect = v.interleave_sect; - UDP->max_errors = v.max_errors; - UDP->flags = v.flags; - UDP->read_track = v.read_track; - memcpy(UDP->autodetect, v.autodetect, sizeof(v.autodetect)); - UDP->checkfreq = v.checkfreq; - UDP->native_format = v.native_format; + drive_params[drive].cmos = v.cmos; + drive_params[drive].max_dtr = v.max_dtr; + drive_params[drive].hlt = v.hlt; + drive_params[drive].hut = v.hut; + drive_params[drive].srt = v.srt; + drive_params[drive].spinup = v.spinup; + drive_params[drive].spindown = v.spindown; + drive_params[drive].spindown_offset = v.spindown_offset; + drive_params[drive].select_delay = v.select_delay; + drive_params[drive].rps = v.rps; + drive_params[drive].tracks = v.tracks; + drive_params[drive].timeout = v.timeout; + drive_params[drive].interleave_sect = v.interleave_sect; + drive_params[drive].max_errors = v.max_errors; + drive_params[drive].flags = v.flags; + drive_params[drive].read_track = v.read_track; + memcpy(drive_params[drive].autodetect, v.autodetect, + sizeof(v.autodetect)); + drive_params[drive].checkfreq = v.checkfreq; + drive_params[drive].native_format = v.native_format; mutex_unlock(&floppy_mutex); return 0; } @@ -3762,25 +3802,26 @@ static int compat_getdrvprm(int drive, memset(&v, 0, sizeof(struct compat_floppy_drive_params)); mutex_lock(&floppy_mutex); - v.cmos = UDP->cmos; - v.max_dtr = UDP->max_dtr; - v.hlt = UDP->hlt; - v.hut = UDP->hut; - v.srt = UDP->srt; - v.spinup = UDP->spinup; - v.spindown = UDP->spindown; - v.spindown_offset = UDP->spindown_offset; - v.select_delay = UDP->select_delay; - v.rps = UDP->rps; - v.tracks = UDP->tracks; - v.timeout = UDP->timeout; - v.interleave_sect = UDP->interleave_sect; - v.max_errors = UDP->max_errors; - v.flags = UDP->flags; - v.read_track = UDP->read_track; - memcpy(v.autodetect, UDP->autodetect, sizeof(v.autodetect)); - v.checkfreq = UDP->checkfreq; - v.native_format = UDP->native_format; + v.cmos = drive_params[drive].cmos; + v.max_dtr = drive_params[drive].max_dtr; + v.hlt = drive_params[drive].hlt; + v.hut = drive_params[drive].hut; + v.srt = drive_params[drive].srt; + v.spinup = drive_params[drive].spinup; + v.spindown = drive_params[drive].spindown; + v.spindown_offset = drive_params[drive].spindown_offset; + v.select_delay = drive_params[drive].select_delay; + v.rps = drive_params[drive].rps; + v.tracks = drive_params[drive].tracks; + v.timeout = drive_params[drive].timeout; + v.interleave_sect = drive_params[drive].interleave_sect; + v.max_errors = drive_params[drive].max_errors; + v.flags = drive_params[drive].flags; + v.read_track = drive_params[drive].read_track; + memcpy(v.autodetect, drive_params[drive].autodetect, + sizeof(v.autodetect)); + v.checkfreq = drive_params[drive].checkfreq; + v.native_format = drive_params[drive].native_format; mutex_unlock(&floppy_mutex); if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_params))) @@ -3803,20 +3844,20 @@ static int compat_getdrvstat(int drive, bool poll, goto Eintr; process_fd_request(); } - v.spinup_date = UDRS->spinup_date; - v.select_date = UDRS->select_date; - v.first_read_date = UDRS->first_read_date; - v.probed_format = UDRS->probed_format; - v.track = UDRS->track; - v.maxblock = UDRS->maxblock; - v.maxtrack = UDRS->maxtrack; - v.generation = UDRS->generation; - v.keep_data = UDRS->keep_data; - v.fd_ref = UDRS->fd_ref; - v.fd_device = UDRS->fd_device; - v.last_checked = UDRS->last_checked; - v.dmabuf = (uintptr_t)UDRS->dmabuf; - v.bufblocks = UDRS->bufblocks; + v.spinup_date = drive_state[drive].spinup_date; + v.select_date = drive_state[drive].select_date; + v.first_read_date = drive_state[drive].first_read_date; + v.probed_format = drive_state[drive].probed_format; + v.track = drive_state[drive].track; + v.maxblock = drive_state[drive].maxblock; + v.maxtrack = drive_state[drive].maxtrack; + v.generation = drive_state[drive].generation; + v.keep_data = drive_state[drive].keep_data; + v.fd_ref = drive_state[drive].fd_ref; + v.fd_device = drive_state[drive].fd_device; + v.last_checked = drive_state[drive].last_checked; + v.dmabuf = (uintptr_t) drive_state[drive].dmabuf; + v.bufblocks = drive_state[drive].bufblocks; mutex_unlock(&floppy_mutex); if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_struct))) @@ -3834,7 +3875,7 @@ static int compat_getfdcstat(int drive, struct floppy_fdc_state v; mutex_lock(&floppy_mutex); - v = *UFDCS; + v = fdc_state[FDC(drive)]; mutex_unlock(&floppy_mutex); memset(&v32, 0, sizeof(struct compat_floppy_fdc_state)); @@ -3864,7 +3905,7 @@ static int compat_werrorget(int drive, memset(&v32, 0, sizeof(struct compat_floppy_write_errors)); mutex_lock(&floppy_mutex); - v = *UDRWE; + v = write_errors[drive]; mutex_unlock(&floppy_mutex); v32.write_errors = v.write_errors; v32.first_error_sector = v.first_error_sector; @@ -3933,16 +3974,16 @@ static void __init config_types(void) /* read drive info out of physical CMOS */ drive = 0; - if (!UDP->cmos) - UDP->cmos = FLOPPY0_TYPE; + if (!drive_params[drive].cmos) + drive_params[drive].cmos = FLOPPY0_TYPE; drive = 1; - if (!UDP->cmos) - UDP->cmos = FLOPPY1_TYPE; + if (!drive_params[drive].cmos) + drive_params[drive].cmos = FLOPPY1_TYPE; /* FIXME: additional physical CMOS drive detection should go here */ for (drive = 0; drive < N_DRIVE; drive++) { - unsigned int type = UDP->cmos; + unsigned int type = drive_params[drive].cmos; struct floppy_drive_params *params; const char *name = NULL; char temparea[32]; @@ -3972,7 +4013,7 @@ static void __init config_types(void) pr_cont("%s fd%d is %s", prepend, drive, name); } - *UDP = *params; + drive_params[drive] = *params; } if (has_drive) @@ -3985,11 +4026,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode) mutex_lock(&floppy_mutex); mutex_lock(&open_lock); - if (!UDRS->fd_ref--) { + if (!drive_state[drive].fd_ref--) { DPRINT("floppy_release with fd_ref == 0"); - UDRS->fd_ref = 0; + drive_state[drive].fd_ref = 0; } - if (!UDRS->fd_ref) + if (!drive_state[drive].fd_ref) opened_bdev[drive] = NULL; mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); @@ -4010,16 +4051,16 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) mutex_lock(&floppy_mutex); mutex_lock(&open_lock); - old_dev = UDRS->fd_device; + old_dev = drive_state[drive].fd_device; if (opened_bdev[drive] && opened_bdev[drive] != bdev) goto out2; - if (!UDRS->fd_ref && (UDP->flags & FD_BROKEN_DCL)) { - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - set_bit(FD_VERIFY_BIT, &UDRS->flags); + if (!drive_state[drive].fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) { + set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); } - UDRS->fd_ref++; + drive_state[drive].fd_ref++; opened_bdev[drive] = bdev; @@ -4028,7 +4069,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (!floppy_track_buffer) { /* if opening an ED drive, reserve a big buffer, * else reserve a small one */ - if ((UDP->cmos == 6) || (UDP->cmos == 5)) + if ((drive_params[drive].cmos == 6) || (drive_params[drive].cmos == 5)) try = 64; /* Only 48 actually useful */ else try = 32; /* Only 24 actually useful */ @@ -4056,38 +4097,39 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) } new_dev = MINOR(bdev->bd_dev); - UDRS->fd_device = new_dev; + drive_state[drive].fd_device = new_dev; set_capacity(disks[drive], floppy_sizes[new_dev]); if (old_dev != -1 && old_dev != new_dev) { if (buffer_drive == drive) buffer_track = -1; } - if (UFDCS->rawcmd == 1) - UFDCS->rawcmd = 2; + if (fdc_state[FDC(drive)].rawcmd == 1) + fdc_state[FDC(drive)].rawcmd = 2; if (!(mode & FMODE_NDELAY)) { if (mode & (FMODE_READ|FMODE_WRITE)) { - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + drive_state[drive].last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, + &drive_state[drive].flags); check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) goto out; } res = -EROFS; if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) + !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) goto out; } mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; out: - UDRS->fd_ref--; + drive_state[drive].fd_ref--; - if (!UDRS->fd_ref) + if (!drive_state[drive].fd_ref) opened_bdev[drive] = NULL; out2: mutex_unlock(&open_lock); @@ -4103,19 +4145,19 @@ static unsigned int floppy_check_events(struct gendisk *disk, { int drive = (long)disk->private_data; - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags)) + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) return DISK_EVENT_MEDIA_CHANGE; - if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { + if (time_after(jiffies, drive_state[drive].last_checked + drive_params[drive].checkfreq)) { if (lock_fdc(drive)) return 0; poll_drive(false, 0); process_fd_request(); } - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags) || + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) || test_bit(drive, &fake_change) || drive_no_geom(drive)) return DISK_EVENT_MEDIA_CHANGE; @@ -4141,7 +4183,7 @@ static void floppy_rb0_cb(struct bio *bio) if (bio->bi_status) { pr_info("floppy: error %d while reading block 0\n", bio->bi_status); - set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + set_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); } complete(&cbdata->complete); } @@ -4198,8 +4240,8 @@ static int floppy_revalidate(struct gendisk *disk) int cf; int res = 0; - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags) || + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) || test_bit(drive, &fake_change) || drive_no_geom(drive)) { if (WARN(atomic_read(&usage_count) == 0, @@ -4209,20 +4251,20 @@ static int floppy_revalidate(struct gendisk *disk) res = lock_fdc(drive); if (res) return res; - cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags)); + cf = (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)); if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) { process_fd_request(); /*already done by another thread */ return 0; } - UDRS->maxblock = 0; - UDRS->maxtrack = 0; + drive_state[drive].maxblock = 0; + drive_state[drive].maxtrack = 0; if (buffer_drive == drive) buffer_track = -1; clear_bit(drive, &fake_change); - clear_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); + clear_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); if (cf) - UDRS->generation++; + drive_state[drive].generation++; if (drive_no_geom(drive)) { /* auto-sensing */ res = __floppy_read_block_0(opened_bdev[drive], drive); @@ -4232,7 +4274,7 @@ static int floppy_revalidate(struct gendisk *disk) process_fd_request(); } } - set_capacity(disk, floppy_sizes[UDRS->fd_device]); + set_capacity(disk, floppy_sizes[drive_state[drive].fd_device]); return res; } @@ -4261,23 +4303,23 @@ static char __init get_fdc_version(void) int r; output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */ - if (FDCS->reset) + if (fdc_state[current_fdc].reset) return FDC_NONE; r = result(); if (r <= 0x00) return FDC_NONE; /* No FDC present ??? */ if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is an 8272A\n", fdc); + pr_info("FDC %d is an 8272A\n", current_fdc); return FDC_8272A; /* 8272a/765 don't know DUMPREGS */ } if (r != 10) { pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n", - fdc, r); + current_fdc, r); return FDC_UNKNOWN; } if (!fdc_configure()) { - pr_info("FDC %d is an 82072\n", fdc); + pr_info("FDC %d is an 82072\n", current_fdc); return FDC_82072; /* 82072 doesn't know CONFIGURE */ } @@ -4285,50 +4327,50 @@ static char __init get_fdc_version(void) if (need_more_output() == MORE_OUTPUT) { output_byte(0); } else { - pr_info("FDC %d is an 82072A\n", fdc); + pr_info("FDC %d is an 82072A\n", current_fdc); return FDC_82072A; /* 82072A as found on Sparcs. */ } output_byte(FD_UNLOCK); r = result(); if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is a pre-1991 82077\n", fdc); + pr_info("FDC %d is a pre-1991 82077\n", current_fdc); return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know * LOCK/UNLOCK */ } if ((r != 1) || (reply_buffer[0] != 0x00)) { pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n", - fdc, r); + current_fdc, r); return FDC_UNKNOWN; } output_byte(FD_PARTID); r = result(); if (r != 1) { pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", - fdc, r); + current_fdc, r); return FDC_UNKNOWN; } if (reply_buffer[0] == 0x80) { - pr_info("FDC %d is a post-1991 82077\n", fdc); + pr_info("FDC %d is a post-1991 82077\n", current_fdc); return FDC_82077; /* Revised 82077AA passes all the tests */ } switch (reply_buffer[0] >> 5) { case 0x0: /* Either a 82078-1 or a 82078SL running at 5Volt */ - pr_info("FDC %d is an 82078.\n", fdc); + pr_info("FDC %d is an 82078.\n", current_fdc); return FDC_82078; case 0x1: - pr_info("FDC %d is a 44pin 82078\n", fdc); + pr_info("FDC %d is a 44pin 82078\n", current_fdc); return FDC_82078; case 0x2: - pr_info("FDC %d is a S82078B\n", fdc); + pr_info("FDC %d is a S82078B\n", current_fdc); return FDC_S82078B; case 0x3: - pr_info("FDC %d is a National Semiconductor PC87306\n", fdc); + pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc); return FDC_87306; default: pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n", - fdc, reply_buffer[0] >> 5); + current_fdc, reply_buffer[0] >> 5); return FDC_82078_UNKN; } } /* get_fdc_version */ @@ -4384,7 +4426,7 @@ static void __init set_cmos(int *ints, int dummy, int dummy2) if (current_drive >= 4 && !FDC2) FDC2 = 0x370; #endif - DP->cmos = ints[2]; + drive_params[current_drive].cmos = ints[2]; DPRINT("setting CMOS code to %d\n", ints[2]); } @@ -4473,7 +4515,7 @@ static ssize_t floppy_cmos_show(struct device *dev, int drive; drive = p->id; - return sprintf(buf, "%X\n", UDP->cmos); + return sprintf(buf, "%X\n", drive_params[drive].cmos); } static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL); @@ -4494,7 +4536,7 @@ static int floppy_resume(struct device *dev) int fdc; for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) + if (fdc_state[fdc].address != -1) user_reset_fdc(-1, FD_RESET_ALWAYS, false); return 0; @@ -4604,16 +4646,16 @@ static int __init do_floppy_init(void) config_types(); for (i = 0; i < N_FDC; i++) { - fdc = i; - memset(FDCS, 0, sizeof(*FDCS)); - FDCS->dtr = -1; - FDCS->dor = 0x4; + current_fdc = i; + memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state)); + fdc_state[current_fdc].dtr = -1; + fdc_state[current_fdc].dor = 0x4; #if defined(__sparc__) || defined(__mc68000__) /*sparcs/sun3x don't have a DOR reset which we can fall back on to */ #ifdef __mc68000__ if (MACH_IS_SUN3X) #endif - FDCS->version = FDC_82072A; + fdc_state[current_fdc].version = FDC_82072A; #endif } @@ -4628,7 +4670,7 @@ static int __init do_floppy_init(void) fdc_state[1].address = FDC2; #endif - fdc = 0; /* reset fdc in case of unexpected interrupt */ + current_fdc = 0; /* reset fdc in case of unexpected interrupt */ err = floppy_grab_irq_and_dma(); if (err) { cancel_delayed_work(&fd_timeout); @@ -4638,12 +4680,12 @@ static int __init do_floppy_init(void) /* initialise drive state */ for (drive = 0; drive < N_DRIVE; drive++) { - memset(UDRS, 0, sizeof(*UDRS)); - memset(UDRWE, 0, sizeof(*UDRWE)); - set_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags); - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - set_bit(FD_VERIFY_BIT, &UDRS->flags); - UDRS->fd_device = -1; + memset(&drive_state[drive], 0, sizeof(drive_state[drive])); + memset(&write_errors[drive], 0, sizeof(write_errors[drive])); + set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags); + set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); + drive_state[drive].fd_device = -1; floppy_track_buffer = NULL; max_buffer_sectors = 0; } @@ -4655,29 +4697,30 @@ static int __init do_floppy_init(void) msleep(10); for (i = 0; i < N_FDC; i++) { - fdc = i; - FDCS->driver_version = FD_DRIVER_VERSION; + current_fdc = i; + fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION; for (unit = 0; unit < 4; unit++) - FDCS->track[unit] = 0; - if (FDCS->address == -1) + fdc_state[current_fdc].track[unit] = 0; + if (fdc_state[current_fdc].address == -1) continue; - FDCS->rawcmd = 2; + fdc_state[current_fdc].rawcmd = 2; if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(fdc); - FDCS->address = -1; - FDCS->version = FDC_NONE; + floppy_release_regions(current_fdc); + fdc_state[current_fdc].address = -1; + fdc_state[current_fdc].version = FDC_NONE; continue; } /* Try to determine the floppy controller type */ - FDCS->version = get_fdc_version(); - if (FDCS->version == FDC_NONE) { + fdc_state[current_fdc].version = get_fdc_version(); + if (fdc_state[current_fdc].version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(fdc); - FDCS->address = -1; + floppy_release_regions(current_fdc); + fdc_state[current_fdc].address = -1; continue; } - if (can_use_virtual_dma == 2 && FDCS->version < FDC_82072A) + if (can_use_virtual_dma == 2 && + fdc_state[current_fdc].version < FDC_82072A) can_use_virtual_dma = 0; have_no_fdc = 0; @@ -4687,7 +4730,7 @@ static int __init do_floppy_init(void) */ user_reset_fdc(-1, FD_RESET_ALWAYS, false); } - fdc = 0; + current_fdc = 0; cancel_delayed_work(&fd_timeout); current_drive = 0; initialized = true; @@ -4783,7 +4826,7 @@ static void floppy_release_allocated_regions(int fdc, const struct io_region *p) { while (p != io_regions) { p--; - release_region(FDCS->address + p->offset, p->size); + release_region(fdc_state[fdc].address + p->offset, p->size); } } @@ -4794,10 +4837,10 @@ static int floppy_request_regions(int fdc) const struct io_region *p; for (p = io_regions; p < ARRAY_END(io_regions); p++) { - if (!request_region(FDCS->address + p->offset, + if (!request_region(fdc_state[fdc].address + p->offset, p->size, "floppy")) { DPRINT("Floppy io-port 0x%04lx in use\n", - FDCS->address + p->offset); + fdc_state[fdc].address + p->offset); floppy_release_allocated_regions(fdc, p); return -EBUSY; } @@ -4839,36 +4882,36 @@ static int floppy_grab_irq_and_dma(void) } } - for (fdc = 0; fdc < N_FDC; fdc++) { - if (FDCS->address != -1) { - if (floppy_request_regions(fdc)) + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { + if (fdc_state[current_fdc].address != -1) { + if (floppy_request_regions(current_fdc)) goto cleanup; } } - for (fdc = 0; fdc < N_FDC; fdc++) { - if (FDCS->address != -1) { + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { + if (fdc_state[current_fdc].address != -1) { reset_fdc_info(1); - fd_outb(FDCS->dor, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); } } - fdc = 0; + current_fdc = 0; set_dor(0, ~0, 8); /* avoid immediate interrupt */ - for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) - fd_outb(FDCS->dor, FD_DOR); + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) + if (fdc_state[current_fdc].address != -1) + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); /* * The driver will try and free resources and relies on us * to know if they were allocated or not. */ - fdc = 0; + current_fdc = 0; irqdma_allocated = 1; return 0; cleanup: fd_free_irq(); fd_free_dma(); - while (--fdc >= 0) - floppy_release_regions(fdc); + while (--current_fdc >= 0) + floppy_release_regions(current_fdc); atomic_dec(&usage_count); return -1; } @@ -4916,11 +4959,11 @@ static void floppy_release_irq_and_dma(void) pr_info("auxiliary floppy timer still active\n"); if (work_pending(&floppy_work)) pr_info("work still pending\n"); - old_fdc = fdc; - for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) - floppy_release_regions(fdc); - fdc = old_fdc; + old_fdc = current_fdc; + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) + if (fdc_state[current_fdc].address != -1) + floppy_release_regions(current_fdc); + current_fdc = old_fdc; } #ifdef MODULE diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 739b372a5112..a42c49e04954 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -214,7 +214,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup * will get updated by ioctl(LOOP_GET_STATUS) */ - blk_mq_freeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; if (use_dio) { blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue); @@ -223,7 +224,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } - blk_mq_unfreeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_unfreeze_queue(lo->lo_queue); } static int @@ -1539,16 +1541,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; - if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size == arg) + return 0; + + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { + if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 78181908f0df..43cff01a5a67 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -395,16 +395,19 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, } config = nbd->config; - if (config->num_connections > 1) { + if (config->num_connections > 1 || + (config->num_connections == 1 && nbd->tag_set.timeout)) { dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out, retrying (%d/%d alive)\n", atomic_read(&config->live_connections), config->num_connections); /* * Hooray we have more connections, requeue this IO, the submit - * path will put it on a real connection. + * path will put it on a real connection. Or if only one + * connection is configured, the submit path will wait util + * a new connection is reconfigured or util dead timeout. */ - if (config->socks && config->num_connections > 1) { + if (config->socks) { if (cmd->index < config->num_connections) { struct nbd_sock *nsock = config->socks[cmd->index]; @@ -431,12 +434,22 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, * Userspace sets timeout=0 to disable socket disconnection, * so just warn and reset the timer. */ + struct nbd_sock *nsock = config->socks[cmd->index]; cmd->retries++; dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n", req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries); + mutex_lock(&nsock->tx_lock); + if (cmd->cookie != nsock->cookie) { + nbd_requeue_cmd(cmd); + mutex_unlock(&nsock->tx_lock); + mutex_unlock(&cmd->lock); + nbd_config_put(nbd); + return BLK_EH_DONE; + } + mutex_unlock(&nsock->tx_lock); mutex_unlock(&cmd->lock); nbd_config_put(nbd); return BLK_EH_RESET_TIMER; @@ -741,14 +754,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); /* - * If we've disconnected or we only have 1 - * connection then we need to make sure we + * If we've disconnected, we need to make sure we * complete this request, otherwise error out * and let the timeout stuff handle resubmitting * this request onto another connection. */ - if (nbd_disconnected(config) || - config->num_connections <= 1) { + if (nbd_disconnected(config)) { cmd->status = BLK_STS_IOERR; goto out; } @@ -825,7 +836,7 @@ static int find_fallback(struct nbd_device *nbd, int index) if (config->num_connections <= 1) { dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on invalid socket\n"); + "Dead connection, failed to find a fallback\n"); return new_index; } diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 5f13793d35ee..4e1c0712278e 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -97,14 +97,21 @@ module_param_named(home_node, g_home_node, int, 0444); MODULE_PARM_DESC(home_node, "Home node for the device"); #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION +/* + * For more details about fault injection, please refer to + * Documentation/fault-injection/fault-injection.rst. + */ static char g_timeout_str[80]; module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444); +MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>"); static char g_requeue_str[80]; module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444); +MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>"); static char g_init_hctx_str[80]; module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); +MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>"); #endif static int g_queue_mode = NULL_Q_MQ; @@ -615,6 +622,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) if (tag != -1U) { cmd = &nq->cmds[tag]; cmd->tag = tag; + cmd->error = BLK_STS_OK; cmd->nq = nq; if (nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, @@ -1395,6 +1403,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->timer.function = null_cmd_timer_expired; } cmd->rq = bd->rq; + cmd->error = BLK_STS_OK; cmd->nq = nq; blk_mq_start_request(bd->rq); diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk_trace.c new file mode 100644 index 000000000000..f246e7bff698 --- /dev/null +++ b/drivers/block/null_blk_trace.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * null_blk trace related helpers. + * + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include "null_blk_trace.h" + +/* + * Helper to use for all null_blk traces to extract disk name. + */ +const char *nullb_trace_disk_name(struct trace_seq *p, char *name) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (name && *name) + trace_seq_printf(p, "disk=%s, ", name); + trace_seq_putc(p, 0); + + return ret; +} diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk_trace.h new file mode 100644 index 000000000000..4f83032eb544 --- /dev/null +++ b/drivers/block/null_blk_trace.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * null_blk device driver tracepoints. + * + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nullb + +#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NULLB_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "null_blk.h" + +const char *nullb_trace_disk_name(struct trace_seq *p, char *name); + +#define __print_disk_name(name) nullb_trace_disk_name(p, name) + +#ifndef TRACE_HEADER_MULTI_READ +static inline void __assign_disk_name(char *name, struct gendisk *disk) +{ + if (disk) + memcpy(name, disk->disk_name, DISK_NAME_LEN); + else + memset(name, 0, DISK_NAME_LEN); +} +#endif + +TRACE_EVENT(nullb_zone_op, + TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no, + unsigned int zone_cond), + TP_ARGS(cmd, zone_no, zone_cond), + TP_STRUCT__entry( + __array(char, disk, DISK_NAME_LEN) + __field(enum req_opf, op) + __field(unsigned int, zone_no) + __field(unsigned int, zone_cond) + ), + TP_fast_assign( + __entry->op = req_op(cmd->rq); + __entry->zone_no = zone_no; + __entry->zone_cond = zone_cond; + __assign_disk_name(__entry->disk, cmd->rq->rq_disk); + ), + TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s", + __print_disk_name(__entry->disk), + blk_op_str(__entry->op), + __entry->zone_no, + blk_zone_cond_str(__entry->zone_cond)) +); + +TRACE_EVENT(nullb_report_zones, + TP_PROTO(struct nullb *nullb, unsigned int nr_zones), + TP_ARGS(nullb, nr_zones), + TP_STRUCT__entry( + __array(char, disk, DISK_NAME_LEN) + __field(unsigned int, nr_zones) + ), + TP_fast_assign( + __entry->nr_zones = nr_zones; + __assign_disk_name(__entry->disk, nullb->disk); + ), + TP_printk("%s nr_zones=%u", + __print_disk_name(__entry->disk), __entry->nr_zones) +); + +#endif /* _TRACE_NULLB_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE null_blk_trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index ed34785dd64b..673618d8222a 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -2,6 +2,9 @@ #include <linux/vmalloc.h> #include "null_blk.h" +#define CREATE_TRACE_POINTS +#include "null_blk_trace.h" + /* zone_size in MBs to sectors. */ #define ZONE_SIZE_SHIFT 11 @@ -80,6 +83,8 @@ int null_report_zones(struct gendisk *disk, sector_t sector, return 0; nr_zones = min(nr_zones, dev->nr_zones - first_zone); + trace_nullb_report_zones(nullb, nr_zones); + for (i = 0; i < nr_zones; i++) { /* * Stacked DM target drivers will remap the zone information by @@ -148,6 +153,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, /* Invalid zone condition */ return BLK_STS_IOERR; } + + trace_nullb_zone_op(cmd, zno, zone->cond); return BLK_STS_OK; } @@ -155,7 +162,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, sector_t sector) { struct nullb_device *dev = cmd->nq->dev; - struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; + unsigned int zone_no = null_zone_no(dev, sector); + struct blk_zone *zone = &dev->zones[zone_no]; size_t i; switch (op) { @@ -203,6 +211,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, default: return BLK_STS_NOTSUPP; } + + trace_nullb_zone_op(cmd, zone_no, zone->cond); return BLK_STS_OK; } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 111eb659e66d..1914f5488b22 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -80,7 +80,7 @@ struct dma_tracker { struct dma_tracker_list { spinlock_t lock; int head; - struct dma_tracker list[0]; + struct dma_tracker list[]; }; diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 7d8958df9472..6387302b03f2 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -37,7 +37,7 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) active = 0; up(&rlun->wr_sem); } - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "pblk: pos:%d, ch:%d, lun:%d - %d\n", i, rlun->bppa.a.ch, @@ -120,7 +120,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf; - sz = snprintf(page, PAGE_SIZE, + sz = scnprintf(page, PAGE_SIZE, "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", pblk->addrf_len, ppaf->blk_offset, ppaf->blk_len, @@ -130,7 +130,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) ppaf->pln_offset, ppaf->pln_len, ppaf->sec_offset, ppaf->sec_len); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", gppaf->blk_offset, gppaf->blk_len, gppaf->pg_offset, gppaf->pg_len, @@ -142,7 +142,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) struct nvm_addrf *ppaf = &pblk->addrf; struct nvm_addrf *gppaf = &geo->addrf; - sz = snprintf(page, PAGE_SIZE, + sz = scnprintf(page, PAGE_SIZE, "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n", pblk->addrf_len, ppaf->ch_offset, ppaf->ch_len, @@ -150,7 +150,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) ppaf->chk_offset, ppaf->chk_len, ppaf->sec_offset, ppaf->sec_len); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n", gppaf->ch_offset, gppaf->ch_len, gppaf->lun_offset, gppaf->lun_len, @@ -278,11 +278,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) pblk_err(pblk, "corrupted free line list:%d/%d\n", nr_free_lines, free_line_cnt); - sz = snprintf(page, PAGE_SIZE - sz, + sz = scnprintf(page, PAGE_SIZE - sz, "line: nluns:%d, nblks:%d, nsecs:%d\n", geo->all_luns, lm->blk_per_line, lm->sec_per_line); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", cur_data, cur_log, nr_free_lines, @@ -292,12 +292,12 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) d_line_cnt, l_line_cnt, l_mg->nr_lines); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, atomic_read(&pblk->gc.read_inflight_gc)); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", cur_data, cur_sec, msecs, vsc, sec_in_line, map_weight, lm->sec_per_line, @@ -313,19 +313,19 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) struct pblk_line_meta *lm = &pblk->lm; ssize_t sz = 0; - sz = snprintf(page, PAGE_SIZE - sz, + sz = scnprintf(page, PAGE_SIZE - sz, "smeta - len:%d, secs:%d\n", lm->smeta_len, lm->smeta_sec); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "emeta - len:%d, sec:%d, bb_start:%d\n", lm->emeta_len[0], lm->emeta_sec[0], lm->emeta_bb); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "bitmap lengths: sec:%d, blk:%d, lun:%d\n", lm->sec_bitmap_len, lm->blk_bitmap_len, lm->lun_bitmap_len); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "blk_line:%d, sec_line:%d, sec_blk:%d\n", lm->blk_per_line, lm->sec_per_line, @@ -344,12 +344,12 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, { int sz; - sz = snprintf(page, PAGE_SIZE, + sz = scnprintf(page, PAGE_SIZE, "user:%lld gc:%lld pad:%lld WA:", user, gc, pad); if (!user) { - sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); + sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); } else { u64 wa_int; u32 wa_frac; @@ -358,7 +358,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, wa_int = div64_u64(wa_int, user); wa_int = div_u64_rem(wa_int, 100000, &wa_frac); - sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", + sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", wa_int, wa_frac); } @@ -401,9 +401,9 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst; if (!total) { for (i = 0; i < (buckets + 1); i++) - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:0 ", i); - sz += snprintf(page + sz, PAGE_SIZE - sz, "\n"); + sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); return sz; } @@ -411,7 +411,7 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) for (i = 0; i < buckets; i++) total_buckets += atomic64_read(&pblk->pad_dist[i]); - sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", + sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", bucket_percentage(total - total_buckets, total)); for (i = 0; i < buckets; i++) { @@ -419,10 +419,10 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]), total); - sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", + sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", i + 1, p); } - sz += snprintf(page + sz, PAGE_SIZE - sz, "\n"); + sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); return sz; } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fa872df4e770..72856e5f23a3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -101,64 +101,6 @@ #define insert_lock(s, b) ((b)->level <= (s)->lock) -/* - * These macros are for recursing down the btree - they handle the details of - * locking and looking up nodes in the cache for you. They're best treated as - * mere syntax when reading code that uses them. - * - * op->lock determines whether we take a read or a write lock at a given depth. - * If you've got a read lock and find that you need a write lock (i.e. you're - * going to have to split), set op->lock and return -EINTR; btree_root() will - * call you again and you'll have the correct lock. - */ - -/** - * btree - recurse down the btree on a specified key - * @fn: function to call, which will be passed the child node - * @key: key to recurse on - * @b: parent btree node - * @op: pointer to struct btree_op - */ -#define btree(fn, key, b, op, ...) \ -({ \ - int _r, l = (b)->level - 1; \ - bool _w = l <= (op)->lock; \ - struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ - _w, b); \ - if (!IS_ERR(_child)) { \ - _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ - rw_unlock(_w, _child); \ - } else \ - _r = PTR_ERR(_child); \ - _r; \ -}) - -/** - * btree_root - call a function on the root of the btree - * @fn: function to call, which will be passed the child node - * @c: cache set - * @op: pointer to struct btree_op - */ -#define btree_root(fn, c, op, ...) \ -({ \ - int _r = -EINTR; \ - do { \ - struct btree *_b = (c)->root; \ - bool _w = insert_lock(op, _b); \ - rw_lock(_w, _b, _b->level); \ - if (_b == (c)->root && \ - _w == insert_lock(op, _b)) { \ - _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ - } \ - rw_unlock(_w, _b); \ - bch_cannibalize_unlock(c); \ - if (_r == -EINTR) \ - schedule(); \ - } while (_r == -EINTR); \ - \ - finish_wait(&(c)->btree_cache_wait, &(op)->wait); \ - _r; \ -}) static inline struct bset *write_block(struct btree *b) { @@ -1848,7 +1790,7 @@ static void bch_btree_gc(struct cache_set *c) /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */ do { - ret = btree_root(gc_root, c, &op, &writes, &stats); + ret = bcache_btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); cond_resched(); @@ -1946,7 +1888,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) } if (p) - ret = btree(check_recurse, p, b, op); + ret = bcache_btree(check_recurse, p, b, op); p = k; } while (p && !ret); @@ -1955,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) return ret; } + +static int bch_btree_check_thread(void *arg) +{ + int ret; + struct btree_check_info *info = arg; + struct btree_check_state *check_state = info->state; + struct cache_set *c = check_state->c; + struct btree_iter iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + int i, n; + + k = p = NULL; + i = n = 0; + cur_idx = prev_idx = 0; + ret = 0; + + /* root node keys are checked before thread created */ + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; + while (k) { + /* + * Fetch a root node key index, skip the keys which + * should be fetched by other threads, then check the + * sub-tree indexed by the fetched key. + */ + spin_lock(&check_state->idx_lock); + cur_idx = check_state->key_idx; + check_state->key_idx++; + spin_unlock(&check_state->idx_lock); + + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { + k = bch_btree_iter_next_filter(&iter, + &c->root->keys, + bch_ptr_bad); + if (k) + p = k; + else { + /* + * No more keys to check in root node, + * current checking threads are enough, + * stop creating more. + */ + atomic_set(&check_state->enough, 1); + /* Update check_state->enough earlier */ + smp_mb__after_atomic(); + goto out; + } + skip_nr--; + cond_resched(); + } + + if (p) { + struct btree_op op; + + btree_node_prefetch(c->root, p); + c->gc_stats.nodes++; + bch_btree_op_init(&op, 0); + ret = bcache_btree(check_recurse, p, c->root, &op); + if (ret) + goto out; + } + p = NULL; + prev_idx = cur_idx; + cond_resched(); + } + +out: + info->result = ret; + /* update check_state->started among all CPUs */ + smp_mb__before_atomic(); + if (atomic_dec_and_test(&check_state->started)) + wake_up(&check_state->wait); + + return ret; +} + + + +static int bch_btree_chkthread_nr(void) +{ + int n = num_online_cpus()/2; + + if (n == 0) + n = 1; + else if (n > BCH_BTR_CHKTHREAD_MAX) + n = BCH_BTR_CHKTHREAD_MAX; + + return n; +} + int bch_btree_check(struct cache_set *c) { - struct btree_op op; + int ret = 0; + int i; + struct bkey *k = NULL; + struct btree_iter iter; + struct btree_check_state *check_state; + char name[32]; - bch_btree_op_init(&op, SHRT_MAX); + /* check and mark root node keys */ + for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) + bch_initial_mark_key(c, c->root->level, k); + + bch_initial_mark_key(c, c->root->level + 1, &c->root->key); + + if (c->root->level == 0) + return 0; + + check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL); + if (!check_state) + return -ENOMEM; - return btree_root(check_recurse, c, &op); + check_state->c = c; + check_state->total_threads = bch_btree_chkthread_nr(); + check_state->key_idx = 0; + spin_lock_init(&check_state->idx_lock); + atomic_set(&check_state->started, 0); + atomic_set(&check_state->enough, 0); + init_waitqueue_head(&check_state->wait); + + /* + * Run multiple threads to check btree nodes in parallel, + * if check_state->enough is non-zero, it means current + * running check threads are enough, unncessary to create + * more. + */ + for (i = 0; i < check_state->total_threads; i++) { + /* fetch latest check_state->enough earlier */ + smp_mb__before_atomic(); + if (atomic_read(&check_state->enough)) + break; + + check_state->infos[i].result = 0; + check_state->infos[i].state = check_state; + snprintf(name, sizeof(name), "bch_btrchk[%u]", i); + atomic_inc(&check_state->started); + + check_state->infos[i].thread = + kthread_run(bch_btree_check_thread, + &check_state->infos[i], + name); + if (IS_ERR(check_state->infos[i].thread)) { + pr_err("fails to run thread bch_btrchk[%d]", i); + for (--i; i >= 0; i--) + kthread_stop(check_state->infos[i].thread); + ret = -ENOMEM; + goto out; + } + } + + wait_event_interruptible(check_state->wait, + atomic_read(&check_state->started) == 0 || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + + for (i = 0; i < check_state->total_threads; i++) { + if (check_state->infos[i].result) { + ret = check_state->infos[i].result; + goto out; + } + } + +out: + kfree(check_state); + return ret; } void bch_initial_gc_finish(struct cache_set *c) @@ -2401,7 +2506,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { - ret = btree(map_nodes_recurse, k, b, + ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); from = NULL; @@ -2419,10 +2524,10 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_nodes_fn *fn, int flags) { - return btree_root(map_nodes_recurse, c, op, from, fn, flags); + return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags); } -static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, +int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, struct bkey *from, btree_map_keys_fn *fn, int flags) { @@ -2435,7 +2540,8 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) - : btree(map_keys_recurse, k, b, op, from, fn, flags); + : bcache_btree(map_keys_recurse, k, + b, op, from, fn, flags); from = NULL; if (ret != MAP_CONTINUE) @@ -2452,7 +2558,7 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_keys_fn *fn, int flags) { - return btree_root(map_keys_recurse, c, op, from, fn, flags); + return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags); } /* Keybuf code */ diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index f4dcca449391..257969980c49 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -145,6 +145,9 @@ struct btree { struct bio *bio; }; + + + #define BTREE_FLAG(flag) \ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ @@ -216,6 +219,25 @@ struct btree_op { unsigned int insert_collision:1; }; +struct btree_check_state; +struct btree_check_info { + struct btree_check_state *state; + struct task_struct *thread; + int result; +}; + +#define BCH_BTR_CHKTHREAD_MAX 64 +struct btree_check_state { + struct cache_set *c; + int total_threads; + int key_idx; + spinlock_t idx_lock; + atomic_t started; + atomic_t enough; + wait_queue_head_t wait; + struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX]; +}; + static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) { memset(op, 0, sizeof(struct btree_op)); @@ -284,6 +306,65 @@ static inline void force_wake_up_gc(struct cache_set *c) wake_up_gc(c); } +/* + * These macros are for recursing down the btree - they handle the details of + * locking and looking up nodes in the cache for you. They're best treated as + * mere syntax when reading code that uses them. + * + * op->lock determines whether we take a read or a write lock at a given depth. + * If you've got a read lock and find that you need a write lock (i.e. you're + * going to have to split), set op->lock and return -EINTR; btree_root() will + * call you again and you'll have the correct lock. + */ + +/** + * btree - recurse down the btree on a specified key + * @fn: function to call, which will be passed the child node + * @key: key to recurse on + * @b: parent btree node + * @op: pointer to struct btree_op + */ +#define bcache_btree(fn, key, b, op, ...) \ +({ \ + int _r, l = (b)->level - 1; \ + bool _w = l <= (op)->lock; \ + struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ + _w, b); \ + if (!IS_ERR(_child)) { \ + _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ + rw_unlock(_w, _child); \ + } else \ + _r = PTR_ERR(_child); \ + _r; \ +}) + +/** + * btree_root - call a function on the root of the btree + * @fn: function to call, which will be passed the child node + * @c: cache set + * @op: pointer to struct btree_op + */ +#define bcache_btree_root(fn, c, op, ...) \ +({ \ + int _r = -EINTR; \ + do { \ + struct btree *_b = (c)->root; \ + bool _w = insert_lock(op, _b); \ + rw_lock(_w, _b, _b->level); \ + if (_b == (c)->root && \ + _w == insert_lock(op, _b)) { \ + _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ + } \ + rw_unlock(_w, _b); \ + bch_cannibalize_unlock(c); \ + if (_r == -EINTR) \ + schedule(); \ + } while (_r == -EINTR); \ + \ + finish_wait(&(c)->btree_cache_wait, &(op)->wait); \ + _r; \ +}) + #define MAP_DONE 0 #define MAP_CONTINUE 1 @@ -314,6 +395,9 @@ typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b, struct bkey *k); int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_keys_fn *fn, int flags); +int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, + struct bkey *from, btree_map_keys_fn *fn, + int flags); typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 3470fae4eabc..323276994aab 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -154,7 +154,7 @@ static ssize_t bch_snprint_string_list(char *buf, size_t i; for (i = 0; list[i]; i++) - out += snprintf(out, buf + size - out, + out += scnprintf(out, buf + size - out, i == selected ? "[%s] " : "%s ", list[i]); out[-1] = '\n'; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 4a40f9eadeaf..3f7641fb28d5 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -183,7 +183,7 @@ static void update_writeback_rate(struct work_struct *work) */ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ - smp_mb(); + smp_mb__after_atomic(); /* * CACHE_SET_IO_DISABLE might be set via sysfs interface, @@ -193,7 +193,7 @@ static void update_writeback_rate(struct work_struct *work) test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ - smp_mb(); + smp_mb__after_atomic(); return; } @@ -229,7 +229,7 @@ static void update_writeback_rate(struct work_struct *work) */ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ - smp_mb(); + smp_mb__after_atomic(); } static unsigned int writeback_delay(struct cached_dev *dc, @@ -785,7 +785,9 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct bcache_device *d) +static int bch_root_node_dirty_init(struct cache_set *c, + struct bcache_device *d, + struct bkey *k) { struct sectors_dirty_init op; int ret; @@ -796,8 +798,13 @@ void bch_sectors_dirty_init(struct bcache_device *d) op.start = KEY(op.inode, 0, 0); do { - ret = bch_btree_map_keys(&op.op, d->c, &op.start, - sectors_dirty_init_fn, 0); + ret = bcache_btree(map_keys_recurse, + k, + c->root, + &op.op, + &op.start, + sectors_dirty_init_fn, + 0); if (ret == -EAGAIN) schedule_timeout_interruptible( msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); @@ -806,6 +813,151 @@ void bch_sectors_dirty_init(struct bcache_device *d) break; } } while (ret == -EAGAIN); + + return ret; +} + +static int bch_dirty_init_thread(void *arg) +{ + struct dirty_init_thrd_info *info = arg; + struct bch_dirty_init_state *state = info->state; + struct cache_set *c = state->c; + struct btree_iter iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + int i; + + k = p = NULL; + i = 0; + cur_idx = prev_idx = 0; + + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; + + while (k) { + spin_lock(&state->idx_lock); + cur_idx = state->key_idx; + state->key_idx++; + spin_unlock(&state->idx_lock); + + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { + k = bch_btree_iter_next_filter(&iter, + &c->root->keys, + bch_ptr_bad); + if (k) + p = k; + else { + atomic_set(&state->enough, 1); + /* Update state->enough earlier */ + smp_mb__after_atomic(); + goto out; + } + skip_nr--; + cond_resched(); + } + + if (p) { + if (bch_root_node_dirty_init(c, state->d, p) < 0) + goto out; + } + + p = NULL; + prev_idx = cur_idx; + cond_resched(); + } + +out: + /* In order to wake up state->wait in time */ + smp_mb__before_atomic(); + if (atomic_dec_and_test(&state->started)) + wake_up(&state->wait); + + return 0; +} + +static int bch_btre_dirty_init_thread_nr(void) +{ + int n = num_online_cpus()/2; + + if (n == 0) + n = 1; + else if (n > BCH_DIRTY_INIT_THRD_MAX) + n = BCH_DIRTY_INIT_THRD_MAX; + + return n; +} + +void bch_sectors_dirty_init(struct bcache_device *d) +{ + int i; + struct bkey *k = NULL; + struct btree_iter iter; + struct sectors_dirty_init op; + struct cache_set *c = d->c; + struct bch_dirty_init_state *state; + char name[32]; + + /* Just count root keys if no leaf node */ + if (c->root->level == 0) { + bch_btree_op_init(&op.op, -1); + op.inode = d->id; + op.count = 0; + op.start = KEY(op.inode, 0, 0); + + for_each_key_filter(&c->root->keys, + k, &iter, bch_ptr_invalid) + sectors_dirty_init_fn(&op.op, c->root, k); + return; + } + + state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); + if (!state) { + pr_warn("sectors dirty init failed: cannot allocate memory"); + return; + } + + state->c = c; + state->d = d; + state->total_threads = bch_btre_dirty_init_thread_nr(); + state->key_idx = 0; + spin_lock_init(&state->idx_lock); + atomic_set(&state->started, 0); + atomic_set(&state->enough, 0); + init_waitqueue_head(&state->wait); + + for (i = 0; i < state->total_threads; i++) { + /* Fetch latest state->enough earlier */ + smp_mb__before_atomic(); + if (atomic_read(&state->enough)) + break; + + state->infos[i].state = state; + atomic_inc(&state->started); + snprintf(name, sizeof(name), "bch_dirty_init[%d]", i); + + state->infos[i].thread = + kthread_run(bch_dirty_init_thread, + &state->infos[i], + name); + if (IS_ERR(state->infos[i].thread)) { + pr_err("fails to run thread bch_dirty_init[%d]", i); + for (--i; i >= 0; i--) + kthread_stop(state->infos[i].thread); + goto out; + } + } + + wait_event_interruptible(state->wait, + atomic_read(&state->started) == 0 || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + +out: + kfree(state); } void bch_cached_dev_writeback_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 4e4c6810dc3c..b029843ce5b6 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -16,6 +16,7 @@ #define BCH_AUTO_GC_DIRTY_THRESHOLD 50 +#define BCH_DIRTY_INIT_THRD_MAX 64 /* * 14 (16384ths) is chosen here as something that each backing device * should be a reasonable fraction of the share, and not to blow up @@ -23,6 +24,24 @@ */ #define WRITEBACK_SHARE_SHIFT 14 +struct bch_dirty_init_state; +struct dirty_init_thrd_info { + struct bch_dirty_init_state *state; + struct task_struct *thread; +}; + +struct bch_dirty_init_state { + struct cache_set *c; + struct bcache_device *d; + int total_threads; + int key_idx; + spinlock_t idx_lock; + atomic_t started; + atomic_t enough; + wait_queue_head_t wait; + struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX]; +}; + static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) { uint64_t i, ret = 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index cd1210a0d957..271e8a587354 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6185,7 +6185,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { md_bitmap_wait_behind_writes(mddev); - if (mddev->pers && mddev->pers->quiesce) { + if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b9358db83e96..9c17ed32be64 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -32,8 +32,6 @@ config NVME_HWMON a hardware monitoring device will be created for each NVMe drive in the system. - If unsure, say N. - config NVME_FABRICS tristate diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 41ad07f6a564..4f907e3beda1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -171,7 +171,6 @@ static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) nvme_remove_namespaces(ctrl); ctrl->ops->delete_ctrl(ctrl); nvme_uninit_ctrl(ctrl); - nvme_put_ctrl(ctrl); } static void nvme_delete_ctrl_work(struct work_struct *work) @@ -192,21 +191,16 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_delete_ctrl); -static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) +static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) { - int ret = 0; - /* * Keep a reference until nvme_do_delete_ctrl() complete, * since ->delete_ctrl can free the controller. */ nvme_get_ctrl(ctrl); - if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) - ret = -EBUSY; - if (!ret) + if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) nvme_do_delete_ctrl(ctrl); nvme_put_ctrl(ctrl); - return ret; } static inline bool nvme_ns_has_pi(struct nvme_ns *ns) @@ -291,11 +285,8 @@ void nvme_complete_rq(struct request *req) nvme_req(req)->ctrl->comp_seen = true; if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { - if ((req->cmd_flags & REQ_NVME_MPATH) && - blk_path_error(status)) { - nvme_failover_req(req); + if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req)) return; - } if (!blk_queue_dying(req->q)) { nvme_retry_req(req); @@ -1055,6 +1046,43 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) return error; } +static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, + struct nvme_ns_id_desc *cur) +{ + const char *warn_str = "ctrl returned bogus length:"; + void *data = cur; + + switch (cur->nidt) { + case NVME_NIDT_EUI64: + if (cur->nidl != NVME_NIDT_EUI64_LEN) { + dev_warn(ctrl->device, "%s %d for NVME_NIDT_EUI64\n", + warn_str, cur->nidl); + return -1; + } + memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); + return NVME_NIDT_EUI64_LEN; + case NVME_NIDT_NGUID: + if (cur->nidl != NVME_NIDT_NGUID_LEN) { + dev_warn(ctrl->device, "%s %d for NVME_NIDT_NGUID\n", + warn_str, cur->nidl); + return -1; + } + memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); + return NVME_NIDT_NGUID_LEN; + case NVME_NIDT_UUID: + if (cur->nidl != NVME_NIDT_UUID_LEN) { + dev_warn(ctrl->device, "%s %d for NVME_NIDT_UUID\n", + warn_str, cur->nidl); + return -1; + } + uuid_copy(&ids->uuid, data + sizeof(*cur)); + return NVME_NIDT_UUID_LEN; + default: + /* Skip unknown types */ + return cur->nidl; + } +} + static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -1074,8 +1102,17 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data, NVME_IDENTIFY_DATA_SIZE); - if (status) + if (status) { + dev_warn(ctrl->device, + "Identify Descriptors failed (%d)\n", status); + /* + * Don't treat an error as fatal, as we potentially already + * have a NGUID or EUI-64. + */ + if (status > 0) + status = 0; goto free_data; + } for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) { struct nvme_ns_id_desc *cur = data + pos; @@ -1083,42 +1120,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, if (cur->nidl == 0) break; - switch (cur->nidt) { - case NVME_NIDT_EUI64: - if (cur->nidl != NVME_NIDT_EUI64_LEN) { - dev_warn(ctrl->device, - "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n", - cur->nidl); - goto free_data; - } - len = NVME_NIDT_EUI64_LEN; - memcpy(ids->eui64, data + pos + sizeof(*cur), len); - break; - case NVME_NIDT_NGUID: - if (cur->nidl != NVME_NIDT_NGUID_LEN) { - dev_warn(ctrl->device, - "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n", - cur->nidl); - goto free_data; - } - len = NVME_NIDT_NGUID_LEN; - memcpy(ids->nguid, data + pos + sizeof(*cur), len); - break; - case NVME_NIDT_UUID: - if (cur->nidl != NVME_NIDT_UUID_LEN) { - dev_warn(ctrl->device, - "ctrl returned bogus length: %d for NVME_NIDT_UUID\n", - cur->nidl); - goto free_data; - } - len = NVME_NIDT_UUID_LEN; - uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); - break; - default: - /* Skip unknown types */ - len = cur->nidl; - break; - } + len = nvme_process_ns_desc(ctrl, ids, cur); + if (len < 0) + goto free_data; len += sizeof(*cur); } @@ -1584,6 +1588,47 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, return ret; } +#ifdef CONFIG_COMPAT +struct nvme_user_io32 { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +} __attribute__((__packed__)); + +#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) + +static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + /* + * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO + * between 32 bit programs and 64 bit kernel. + * The cause is that the results of sizeof(struct nvme_user_io), + * which is used to define NVME_IOCTL_SUBMIT_IO, + * are not same between 32 bit compiler and 64 bit compiler. + * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling + * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs. + * Other IOCTL numbers are same between 32 bit and 64 bit. + * So there is nothing to do regarding to other IOCTL numbers. + */ + if (cmd == NVME_IOCTL_SUBMIT_IO32) + return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg); + + return nvme_ioctl(bdev, mode, cmd, arg); +} +#else +#define nvme_compat_ioctl NULL +#endif /* CONFIG_COMPAT */ + static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; @@ -1721,26 +1766,15 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, struct nvme_id_ns *id, struct nvme_ns_ids *ids) { - int ret = 0; - memset(ids, 0, sizeof(*ids)); if (ctrl->vs >= NVME_VS(1, 1, 0)) memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); if (ctrl->vs >= NVME_VS(1, 2, 0)) memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); - if (ctrl->vs >= NVME_VS(1, 3, 0)) { - /* Don't treat error as fatal we potentially - * already have a NGUID or EUI-64 - */ - ret = nvme_identify_ns_descs(ctrl, nsid, ids); - if (ret) - dev_warn(ctrl->device, - "Identify Descriptors failed (%d)\n", ret); - if (ret > 0) - ret = 0; - } - return ret; + if (ctrl->vs >= NVME_VS(1, 3, 0)) + return nvme_identify_ns_descs(ctrl, nsid, ids); + return 0; } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) @@ -2027,7 +2061,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit); static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, @@ -2055,7 +2089,7 @@ const struct block_device_operations nvme_ns_head_ops = { .open = nvme_ns_head_open, .release = nvme_ns_head_release, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, .getgeo = nvme_getgeo, .pr_ops = &nvme_pr_ops, }; @@ -2074,13 +2108,13 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) if ((csts & NVME_CSTS_RDY) == bit) break; - msleep(100); + usleep_range(1000, 2000); if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { dev_err(ctrl->device, - "Device not ready; aborting %s\n", enabled ? - "initialisation" : "reset"); + "Device not ready; aborting %s, CSTS=0x%x\n", + enabled ? "initialisation" : "reset", csts); return -ENODEV; } } @@ -2591,8 +2625,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, lockdep_assert_held(&nvme_subsystems_lock); list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { - if (tmp->state == NVME_CTRL_DELETING || - tmp->state == NVME_CTRL_DEAD) + if (nvme_state_terminal(tmp)) continue; if (tmp->cntlid == ctrl->cntlid) { @@ -3193,6 +3226,10 @@ static ssize_t nvme_sysfs_delete(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + /* Can't delete non-created controllers */ + if (!ctrl->created) + return -EBUSY; + if (device_remove_file_self(dev, attr)) nvme_delete_ctrl_sync(ctrl); return count; @@ -3242,6 +3279,26 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, } static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); +static ssize_t nvme_sysfs_show_hostnqn(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->opts->host->nqn); +} +static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL); + +static ssize_t nvme_sysfs_show_hostid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%pU\n", &ctrl->opts->host->id); +} +static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL); + static ssize_t nvme_sysfs_show_address(struct device *dev, struct device_attribute *attr, char *buf) @@ -3267,6 +3324,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_numa_node.attr, &dev_attr_queue_count.attr, &dev_attr_sqsize.attr, + &dev_attr_hostnqn.attr, + &dev_attr_hostid.attr, NULL }; @@ -3280,6 +3339,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_address.attr && !ctrl->ops->get_address) return 0; + if (a == &dev_attr_hostnqn.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_hostid.attr && !ctrl->opts) + return 0; return a->mode; } @@ -3294,7 +3357,7 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *__nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, unsigned nsid) { struct nvme_ns_head *h; @@ -3327,7 +3390,8 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns *id) + unsigned nsid, struct nvme_id_ns *id, + struct nvme_ns_ids *ids) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3350,12 +3414,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_ida_remove; head->subsys = ctrl->subsys; head->ns_id = nsid; + head->ids = *ids; kref_init(&head->ref); - ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids); - if (ret) - goto out_cleanup_srcu; - ret = __nvme_check_ids(ctrl->subsys, head); if (ret) { dev_err(ctrl->device, @@ -3390,24 +3451,23 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_ctrl *ctrl = ns->ctrl; bool is_shared = id->nmic & (1 << 0); struct nvme_ns_head *head = NULL; + struct nvme_ns_ids ids; int ret = 0; + ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); + if (ret) + goto out; + mutex_lock(&ctrl->subsys->lock); if (is_shared) - head = __nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, id); + head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } } else { - struct nvme_ns_ids ids; - - ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) - goto out_unlock; - if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", @@ -3422,6 +3482,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, out_unlock: mutex_unlock(&ctrl->subsys->lock); +out: if (ret > 0) ret = blk_status_to_errno(nvme_error_status(ret)); return ret; @@ -3480,7 +3541,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) return 0; } -static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) +static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; @@ -3490,13 +3551,11 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) - return -ENOMEM; + return; ns->queue = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ns->queue)) { - ret = PTR_ERR(ns->queue); + if (IS_ERR(ns->queue)) goto out_free_ns; - } if (ctrl->opts && ctrl->opts->data_digest) ns->queue->backing_dev_info->capabilities @@ -3519,10 +3578,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ret) goto out_free_queue; - if (id->ncap == 0) { - ret = -EINVAL; + if (id->ncap == 0) /* no namespace (legacy quirk) */ goto out_free_id; - } ret = nvme_init_ns_head(ns, nsid, id); if (ret) @@ -3531,10 +3588,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); - if (!disk) { - ret = -ENOMEM; + if (!disk) goto out_unlink_ns; - } disk->fops = &nvme_fops; disk->private_data = ns; @@ -3565,7 +3620,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); kfree(id); - return 0; + return; out_put_disk: put_disk(ns->disk); out_unlink_ns: @@ -3579,9 +3634,6 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_cleanup_queue(ns->queue); out_free_ns: kfree(ns); - if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); - return ret; } static void nvme_ns_remove(struct nvme_ns *ns) @@ -3987,6 +4039,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_queue_scan(ctrl); nvme_start_queues(ctrl); } + ctrl->created = true; } EXPORT_SYMBOL_GPL(nvme_start_ctrl); @@ -3995,6 +4048,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); + nvme_put_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); @@ -4077,6 +4131,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, if (ret) goto out_release_instance; + nvme_get_ctrl(ctrl); cdev_init(&ctrl->cdev, &nvme_dev_fops); ctrl->cdev.owner = ops->module; ret = cdev_device_add(&ctrl->cdev, ctrl->device); @@ -4095,6 +4150,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, return 0; out_free_name: + nvme_put_ctrl(ctrl); kfree_const(ctrl->device->kobj.name); out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); @@ -4299,6 +4355,7 @@ static void __exit nvme_core_exit(void) destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); + ida_destroy(&nvme_instance_ida); } MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 74b8818ac9a1..2a6c8190eeb7 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -105,14 +105,14 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) int len = 0; if (ctrl->opts->mask & NVMF_OPT_TRADDR) - len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr); + len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr); if (ctrl->opts->mask & NVMF_OPT_TRSVCID) - len += snprintf(buf + len, size - len, "%strsvcid=%s", + len += scnprintf(buf + len, size - len, "%strsvcid=%s", (len) ? "," : "", ctrl->opts->trsvcid); if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) - len += snprintf(buf + len, size - len, "%shost_traddr=%s", + len += scnprintf(buf + len, size - len, "%shost_traddr=%s", (len) ? "," : "", ctrl->opts->host_traddr); - len += snprintf(buf + len, size - len, "\n"); + len += scnprintf(buf + len, size - len, "\n"); return len; } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5a70ac395d53..a8bf2fb1287b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3181,10 +3181,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto fail_ctrl; } - nvme_get_ctrl(&ctrl->ctrl); - if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { - nvme_put_ctrl(&ctrl->ctrl); dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to schedule initial connect\n", ctrl->cnum); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a38d7f196aba..61bf87592570 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -64,17 +64,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, } } -void nvme_failover_req(struct request *req) +bool nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; u16 status = nvme_req(req)->status; unsigned long flags; - spin_lock_irqsave(&ns->head->requeue_lock, flags); - blk_steal_bios(&ns->head->requeue_list, req); - spin_unlock_irqrestore(&ns->head->requeue_lock, flags); - blk_mq_end_request(req, 0); - switch (status & 0x7ff) { case NVME_SC_ANA_TRANSITION: case NVME_SC_ANA_INACCESSIBLE: @@ -103,15 +98,17 @@ void nvme_failover_req(struct request *req) nvme_mpath_clear_current_path(ns); break; default: - /* - * Reset the controller for any non-ANA error as we don't know - * what caused the error. - */ - nvme_reset_ctrl(ns->ctrl); - break; + /* This was a non-ANA error so follow the normal error path. */ + return false; } + spin_lock_irqsave(&ns->head->requeue_lock, flags); + blk_steal_bios(&ns->head->requeue_list, req); + spin_unlock_irqrestore(&ns->head->requeue_lock, flags); + blk_mq_end_request(req, 0); + kblockd_schedule_work(&ns->head->requeue_work); + return true; } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1024fec7914c..2e04a36296d9 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -259,6 +259,7 @@ struct nvme_ctrl { struct nvme_command ka_cmd; struct work_struct fw_act_work; unsigned long events; + bool created; #ifdef CONFIG_NVME_MULTIPATH /* asymmetric namespace access: */ @@ -550,7 +551,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); -void nvme_failover_req(struct request *req); +bool nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); @@ -599,8 +600,9 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); } -static inline void nvme_failover_req(struct request *req) +static inline bool nvme_failover_req(struct request *req) { + return false; } static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d3f23d6254e4..4e79e412b276 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -971,39 +971,25 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) nvme_end_request(req, cqe->status, cqe->result); } -static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) -{ - while (start != end) { - nvme_handle_cqe(nvmeq, start); - if (++start == nvmeq->q_depth) - start = 0; - } -} - static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (nvmeq->cq_head == nvmeq->q_depth - 1) { + if (++nvmeq->cq_head == nvmeq->q_depth) { nvmeq->cq_head = 0; - nvmeq->cq_phase = !nvmeq->cq_phase; - } else { - nvmeq->cq_head++; + nvmeq->cq_phase ^= 1; } } -static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, - u16 *end, unsigned int tag) +static inline int nvme_process_cq(struct nvme_queue *nvmeq) { int found = 0; - *start = nvmeq->cq_head; while (nvme_cqe_pending(nvmeq)) { - if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag) - found++; + found++; + nvme_handle_cqe(nvmeq, nvmeq->cq_head); nvme_update_cq_head(nvmeq); } - *end = nvmeq->cq_head; - if (*start != *end) + if (found) nvme_ring_cq_doorbell(nvmeq); return found; } @@ -1012,21 +998,16 @@ static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; irqreturn_t ret = IRQ_NONE; - u16 start, end; /* * The rmb/wmb pair ensures we see all updates from a previous run of * the irq handler, even if that was on another CPU. */ rmb(); - nvme_process_cq(nvmeq, &start, &end, -1); + if (nvme_process_cq(nvmeq)) + ret = IRQ_HANDLED; wmb(); - if (start != end) { - nvme_complete_cqes(nvmeq, start, end); - return IRQ_HANDLED; - } - return ret; } @@ -1039,46 +1020,30 @@ static irqreturn_t nvme_irq_check(int irq, void *data) } /* - * Poll for completions any queue, including those not dedicated to polling. + * Poll for completions for any interrupt driven queue * Can be called from any context. */ -static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) +static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) { struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); - u16 start, end; - int found; - /* - * For a poll queue we need to protect against the polling thread - * using the CQ lock. For normal interrupt driven threads we have - * to disable the interrupt to avoid racing with it. - */ - if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { - spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq, &start, &end, tag); - spin_unlock(&nvmeq->cq_poll_lock); - } else { - disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - found = nvme_process_cq(nvmeq, &start, &end, tag); - enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - } + WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); - nvme_complete_cqes(nvmeq, start, end); - return found; + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + nvme_process_cq(nvmeq); + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } static int nvme_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_queue *nvmeq = hctx->driver_data; - u16 start, end; bool found; if (!nvme_cqe_pending(nvmeq)) return 0; spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq, &start, &end, -1); - nvme_complete_cqes(nvmeq, start, end); + found = nvme_process_cq(nvmeq); spin_unlock(&nvmeq->cq_poll_lock); return found; @@ -1255,7 +1220,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) /* * Did we miss an interrupt? */ - if (nvme_poll_irqdisable(nvmeq, req->tag)) { + if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) + nvme_poll(req->mq_hctx); + else + nvme_poll_irqdisable(nvmeq); + + if (blk_mq_request_completed(req)) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); @@ -1398,7 +1368,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) else nvme_disable_ctrl(&dev->ctrl); - nvme_poll_irqdisable(nvmeq, -1); + nvme_poll_irqdisable(nvmeq); } /* @@ -1409,13 +1379,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) */ static void nvme_reap_pending_cqes(struct nvme_dev *dev) { - u16 start, end; int i; - for (i = dev->ctrl.queue_count - 1; i > 0; i--) { - nvme_process_cq(&dev->queues[i], &start, &end, -1); - nvme_complete_cqes(&dev->queues[i], start, end); - } + for (i = dev->ctrl.queue_count - 1; i > 0; i--) + nvme_process_cq(&dev->queues[i]); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, @@ -2503,13 +2470,13 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) struct nvme_dev *dev = to_nvme_dev(ctrl); nvme_dbbuf_dma_free(dev); - put_device(dev->dev); nvme_free_tagset(dev); if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); - kfree(dev->queues); free_opal_dev(dev->ctrl.opal_dev); mempool_destroy(dev->iod_mempool); + put_device(dev->dev); + kfree(dev->queues); kfree(dev); } @@ -2689,7 +2656,7 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) { struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); - return snprintf(buf, size, "%s", dev_name(&pdev->dev)); + return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { @@ -2835,7 +2802,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_reset_ctrl(&dev->ctrl); - nvme_get_ctrl(&dev->ctrl); async_schedule(nvme_async_probe, dev); return 0; @@ -2907,10 +2873,9 @@ static void nvme_remove(struct pci_dev *pdev) nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_uninit_ctrl(&dev->ctrl); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); - nvme_put_ctrl(&dev->ctrl); + nvme_uninit_ctrl(&dev->ctrl); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0fe08c4dfd2f..86603d9b0cef 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1024,8 +1024,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); if (!changed) { - /* state change failure is ok if we're in DELETING state */ + /* + * state change failure is ok if we're in DELETING state, + * unless we're during creation of a new controller to + * avoid races with teardown flow. + */ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); + WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; } @@ -2045,8 +2050,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - nvme_get_ctrl(&ctrl->ctrl); - mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); mutex_unlock(&nvme_rdma_ctrl_mutex); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 49d4373b84eb..0ef14f0fad86 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -20,6 +20,16 @@ struct nvme_tcp_queue; +/* Define the socket priority to use for connections were it is desirable + * that the NIC consider performing optimized packet processing or filtering. + * A non-zero value being sufficient to indicate general consideration of any + * possible optimization. Making it a module param allows for alternative + * values that may be unique for some NIC implementations. + */ +static int so_priority; +module_param(so_priority, int, 0644); +MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1017,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) if (req->state == NVME_TCP_SEND_DDGST) ret = nvme_tcp_try_send_ddgst(req); done: - if (ret == -EAGAIN) + if (ret == -EAGAIN) { ret = 0; + } else if (ret < 0) { + dev_err(queue->ctrl->ctrl.device, + "failed to send request %d\n", ret); + if (ret != -EPIPE && ret != -ECONNRESET) + nvme_tcp_fail_request(queue->request); + nvme_tcp_done_send_req(queue); + } return ret; } @@ -1049,25 +1066,16 @@ static void nvme_tcp_io_work(struct work_struct *w) int result; result = nvme_tcp_try_send(queue); - if (result > 0) { + if (result > 0) pending = true; - } else if (unlikely(result < 0)) { - dev_err(queue->ctrl->ctrl.device, - "failed to send request %d\n", result); - - /* - * Fail the request unless peer closed the connection, - * in which case error recovery flow will complete all. - */ - if ((result != -EPIPE) && (result != -ECONNRESET)) - nvme_tcp_fail_request(queue->request); - nvme_tcp_done_send_req(queue); - return; - } + else if (unlikely(result < 0)) + break; result = nvme_tcp_try_recv(queue); if (result > 0) pending = true; + else if (unlikely(result < 0)) + break; if (!pending) return; @@ -1248,13 +1256,67 @@ free_icreq: return ret; } +static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) +{ + return nvme_tcp_queue_id(queue) == 0; +} + +static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; +} + +static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ]; +} + +static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + !nvme_tcp_read_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ] + + ctrl->io_queues[HCTX_TYPE_POLL]; +} + +static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + int n = 0; + + if (nvme_tcp_default_queue(queue)) + n = qid - 1; + else if (nvme_tcp_read_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; + else if (nvme_tcp_poll_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - + ctrl->io_queues[HCTX_TYPE_READ] - 1; + queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); +} + static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, size_t queue_size) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; struct linger sol = { .l_onoff = 1, .l_linger = 0 }; - int ret, opt, rcv_pdu_size, n; + int ret, opt, rcv_pdu_size; queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); @@ -1309,6 +1371,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_sock; } + if (so_priority > 0) { + ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to set SO_PRIORITY sock opt, ret %d\n", + ret); + goto err_sock; + } + } + /* Set socket type of service */ if (nctrl->opts->tos >= 0) { opt = nctrl->opts->tos; @@ -1322,11 +1395,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } queue->sock->sk->sk_allocation = GFP_ATOMIC; - if (!qid) - n = 0; - else - n = (qid - 1) % num_online_cpus(); - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + nvme_tcp_set_queue_io_cpu(queue); queue->request = NULL; queue->data_remaining = 0; queue->ddgst_remaining = 0; @@ -1861,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) } if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { - /* state change failure is ok if we're in DELETING state */ + /* + * state change failure is ok if we're in DELETING state, + * unless we're during creation of a new controller to + * avoid races with teardown flow. + */ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); + WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; } @@ -2359,8 +2433,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - nvme_get_ctrl(&ctrl->ctrl); - mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); mutex_unlock(&nvme_tcp_ctrl_mutex); diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index cca759c918a4..9d6f75cfa77c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -323,12 +323,25 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); } +static void nvmet_id_set_model_number(struct nvme_id_ctrl *id, + struct nvmet_subsys *subsys) +{ + const char *model = NVMET_DEFAULT_CTRL_MODEL; + struct nvmet_subsys_model *subsys_model; + + rcu_read_lock(); + subsys_model = rcu_dereference(subsys->model); + if (subsys_model) + model = subsys_model->number; + memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' '); + rcu_read_unlock(); +} + static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; u16 status = 0; - const char model[] = "Linux"; id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { @@ -343,7 +356,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) memset(id->sn, ' ', sizeof(id->sn)); bin2hex(id->sn, &ctrl->subsys->serial, min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); - memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); + nvmet_id_set_model_number(id, ctrl->subsys); memcpy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE), ' '); @@ -357,8 +370,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* we support multiple ports, multiples hosts and ANA: */ id->cmic = (1 << 0) | (1 << 1) | (1 << 3); - /* no limit on data transfer sizes for now */ - id->mdts = 0; + /* Limit MDTS according to transport capability */ + if (ctrl->ops->get_mdts) + id->mdts = ctrl->ops->get_mdts(ctrl); + else + id->mdts = 0; + id->cntlid = cpu_to_le16(ctrl->cntlid); id->ver = cpu_to_le32(ctrl->subsys->ver); @@ -721,13 +738,22 @@ static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); + u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11); u16 status = 0; + u16 nsqr; + u16 ncqr; if (!nvmet_check_data_len(req, 0)) return; switch (cdw10 & 0xff) { case NVME_FEAT_NUM_QUEUES: + ncqr = (cdw11 >> 16) & 0xffff; + nsqr = cdw11 & 0xffff; + if (ncqr == 0xffff || nsqr == 0xffff) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } nvmet_set_result(req, (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 98613a45bd3b..7aa10788b7c8 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -395,14 +395,12 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item, struct nvmet_subsys *subsys = ns->subsys; int ret = 0; - mutex_lock(&subsys->lock); if (ns->enabled) { ret = -EBUSY; goto out_unlock; } - if (uuid_parse(page, &ns->uuid)) ret = -EINVAL; @@ -815,10 +813,10 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item, (int)NVME_MAJOR(subsys->ver), (int)NVME_MINOR(subsys->ver), (int)NVME_TERTIARY(subsys->ver)); - else - return snprintf(page, PAGE_SIZE, "%d.%d\n", - (int)NVME_MAJOR(subsys->ver), - (int)NVME_MINOR(subsys->ver)); + + return snprintf(page, PAGE_SIZE, "%d.%d\n", + (int)NVME_MAJOR(subsys->ver), + (int)NVME_MINOR(subsys->ver)); } static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, @@ -828,7 +826,6 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, int major, minor, tertiary = 0; int ret; - ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); if (ret != 2 && ret != 3) return -EINVAL; @@ -852,20 +849,151 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, const char *page, size_t count) { - struct nvmet_subsys *subsys = to_subsys(item); + u64 serial; + + if (sscanf(page, "%llx\n", &serial) != 1) + return -EINVAL; down_write(&nvmet_config_sem); - sscanf(page, "%llx\n", &subsys->serial); + to_subsys(item)->serial = serial; up_write(&nvmet_config_sem); return count; } CONFIGFS_ATTR(nvmet_subsys_, attr_serial); +static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min); +} + +static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item, + const char *page, size_t cnt) +{ + u16 cntlid_min; + + if (sscanf(page, "%hu\n", &cntlid_min) != 1) + return -EINVAL; + + if (cntlid_min == 0) + return -EINVAL; + + down_write(&nvmet_config_sem); + if (cntlid_min >= to_subsys(item)->cntlid_max) + goto out_unlock; + to_subsys(item)->cntlid_min = cntlid_min; + up_write(&nvmet_config_sem); + return cnt; + +out_unlock: + up_write(&nvmet_config_sem); + return -EINVAL; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min); + +static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max); +} + +static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item, + const char *page, size_t cnt) +{ + u16 cntlid_max; + + if (sscanf(page, "%hu\n", &cntlid_max) != 1) + return -EINVAL; + + if (cntlid_max == 0) + return -EINVAL; + + down_write(&nvmet_config_sem); + if (cntlid_max <= to_subsys(item)->cntlid_min) + goto out_unlock; + to_subsys(item)->cntlid_max = cntlid_max; + up_write(&nvmet_config_sem); + return cnt; + +out_unlock: + up_write(&nvmet_config_sem); + return -EINVAL; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max); + +static ssize_t nvmet_subsys_attr_model_show(struct config_item *item, + char *page) +{ + struct nvmet_subsys *subsys = to_subsys(item); + struct nvmet_subsys_model *subsys_model; + char *model = NVMET_DEFAULT_CTRL_MODEL; + int ret; + + rcu_read_lock(); + subsys_model = rcu_dereference(subsys->model); + if (subsys_model) + model = subsys_model->number; + ret = snprintf(page, PAGE_SIZE, "%s\n", model); + rcu_read_unlock(); + + return ret; +} + +/* See Section 1.5 of NVMe 1.4 */ +static bool nvmet_is_ascii(const char c) +{ + return c >= 0x20 && c <= 0x7e; +} + +static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + struct nvmet_subsys_model *new_model; + char *new_model_number; + int pos = 0, len; + + len = strcspn(page, "\n"); + if (!len) + return -EINVAL; + + for (pos = 0; pos < len; pos++) { + if (!nvmet_is_ascii(page[pos])) + return -EINVAL; + } + + new_model_number = kstrndup(page, len, GFP_KERNEL); + if (!new_model_number) + return -ENOMEM; + + new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL); + if (!new_model) { + kfree(new_model_number); + return -ENOMEM; + } + memcpy(new_model->number, new_model_number, len); + + down_write(&nvmet_config_sem); + mutex_lock(&subsys->lock); + new_model = rcu_replace_pointer(subsys->model, new_model, + mutex_is_locked(&subsys->lock)); + mutex_unlock(&subsys->lock); + up_write(&nvmet_config_sem); + + kfree_rcu(new_model, rcuhead); + + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_model); + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, &nvmet_subsys_attr_attr_serial, + &nvmet_subsys_attr_attr_cntlid_min, + &nvmet_subsys_attr_attr_cntlid_max, + &nvmet_subsys_attr_attr_model, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 576de773b4db..b685f99d56a1 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1289,8 +1289,11 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->sqs) goto out_free_cqs; + if (subsys->cntlid_min > subsys->cntlid_max) + goto out_free_cqs; + ret = ida_simple_get(&cntlid_ida, - NVME_CNTLID_MIN, NVME_CNTLID_MAX, + subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); if (ret < 0) { status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; @@ -1438,7 +1441,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, kfree(subsys); return ERR_PTR(-ENOMEM); } - + subsys->cntlid_min = NVME_CNTLID_MIN; + subsys->cntlid_max = NVME_CNTLID_MAX; kref_init(&subsys->ref); mutex_init(&subsys->lock); @@ -1457,6 +1461,7 @@ static void nvmet_subsys_free(struct kref *ref) WARN_ON_ONCE(!list_empty(&subsys->namespaces)); kfree(subsys->subsysnqn); + kfree_rcu(subsys->model, rcuhead); kfree(subsys); } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 4df4ebde208a..0d54e730cbf2 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -485,7 +485,6 @@ out_destroy_admin: out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); } static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { @@ -618,8 +617,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); - nvme_get_ctrl(&ctrl->ctrl); - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index eda28b22a2c8..421dff3ea143 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -23,6 +23,7 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 #define NVMET_NO_ERROR_LOC ((u16)-1) +#define NVMET_DEFAULT_CTRL_MODEL "Linux" /* * Supported optional AENs: @@ -202,6 +203,11 @@ struct nvmet_ctrl { struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; }; +struct nvmet_subsys_model { + struct rcu_head rcuhead; + char number[]; +}; + struct nvmet_subsys { enum nvme_subsys_type type; @@ -211,6 +217,8 @@ struct nvmet_subsys { struct list_head namespaces; unsigned int nr_namespaces; unsigned int max_nsid; + u16 cntlid_min; + u16 cntlid_max; struct list_head ctrls; @@ -227,6 +235,8 @@ struct nvmet_subsys { struct config_group namespaces_group; struct config_group allowed_hosts_group; + + struct nvmet_subsys_model __rcu *model; }; static inline struct nvmet_subsys *to_subsys(struct config_item *item) @@ -279,6 +289,7 @@ struct nvmet_fabrics_ops { struct nvmet_port *port, char *traddr); u16 (*install_queue)(struct nvmet_sq *nvme_sq); void (*discovery_chg)(struct nvmet_port *port); + u8 (*get_mdts)(const struct nvmet_ctrl *ctrl); }; #define NVMET_MAX_INLINE_BIOVEC 8 diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 37d262a65877..9e1b8c61f54e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -31,6 +31,9 @@ #define NVMET_RDMA_MAX_INLINE_SGE 4 #define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) +/* Assume mpsmin == device_page_size == 4KB */ +#define NVMET_RDMA_MAX_MDTS 8 + struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_cqe cqe; @@ -975,7 +978,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) { struct ib_qp_init_attr qp_attr; struct nvmet_rdma_device *ndev = queue->dev; - int comp_vector, nr_cqe, ret, i; + int comp_vector, nr_cqe, ret, i, factor; /* * Spread the io queues across completion vectors, @@ -1008,7 +1011,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.qp_type = IB_QPT_RC; /* +1 for drain */ qp_attr.cap.max_send_wr = queue->send_queue_size + 1; - qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; + factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num, + 1 << NVMET_RDMA_MAX_MDTS); + qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor; qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, ndev->device->attrs.max_send_sge); @@ -1602,6 +1607,11 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, } } +static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) +{ + return NVMET_RDMA_MAX_MDTS; +} + static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, @@ -1612,6 +1622,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .queue_response = nvmet_rdma_queue_response, .delete_ctrl = nvmet_rdma_delete_ctrl, .disc_traddr = nvmet_rdma_disc_port_addr, + .get_mdts = nvmet_rdma_get_mdts, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 5bb5342b8d0c..f0da04e960f4 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -19,6 +19,16 @@ #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) +/* Define the socket priority to use for connections were it is desirable + * that the NIC consider performing optimized packet processing or filtering. + * A non-zero value being sufficient to indicate general consideration of any + * possible optimization. Making it a module param allows for alternative + * values that may be unique for some NIC implementations. + */ +static int so_priority; +module_param(so_priority, int, 0644); +MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority"); + #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 @@ -622,7 +632,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) return 1; } -static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) +static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; @@ -632,6 +642,9 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) }; int ret; + if (!last_in_batch && cmd->queue->send_list_len) + msg.msg_flags |= MSG_MORE; + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) return ret; @@ -672,7 +685,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, } if (cmd->state == NVMET_TCP_SEND_DDGST) { - ret = nvmet_try_send_ddgst(cmd); + ret = nvmet_try_send_ddgst(cmd, last_in_batch); if (ret <= 0) goto done_send; } @@ -794,7 +807,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); - icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ + icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */ icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; @@ -1439,6 +1452,13 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) if (ret) return ret; + if (so_priority > 0) { + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) + return ret; + } + /* Set socket type of service */ if (inet->rcv_tos > 0) { int tos = inet->rcv_tos; @@ -1628,6 +1648,15 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) goto err_sock; } + if (so_priority > 0) { + ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) { + pr_err("failed to set SO_PRIORITY sock opt %d\n", ret); + goto err_sock; + } + } + ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, sizeof(port->addr)); if (ret) { |