diff options
author | Doug Ledford <dledford@redhat.com> | 2016-12-14 14:44:08 -0500 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-12-14 14:44:08 -0500 |
commit | 253f8b22e0ad643edafd75e831e5c765732877f5 (patch) | |
tree | c0e682e339f287a70606927863b9cc622c9952f1 /drivers | |
parent | 884fa4f3048c4c43facfa6ba3b710169f7ee162c (diff) | |
parent | 22dccc5454a39427de7b87a080d026b6bf66a7b9 (diff) |
Merge branch 'hfi1' into merge-test
Diffstat (limited to 'drivers')
42 files changed, 1677 insertions, 648 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a26a9a0bfc41..4962b6ef1f34 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -125,6 +125,7 @@ int node_affinity_init(void) cpumask_weight(topology_sibling_cpumask( cpumask_first(&node_affinity.proc.mask) )); + node_affinity.num_possible_nodes = num_possible_nodes(); node_affinity.num_online_nodes = num_online_nodes(); node_affinity.num_online_cpus = num_online_cpus(); @@ -135,7 +136,7 @@ int node_affinity_init(void) */ init_real_cpu_mask(); - hfi1_per_node_cntr = kcalloc(num_possible_nodes(), + hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, sizeof(*hfi1_per_node_cntr), GFP_KERNEL); if (!hfi1_per_node_cntr) return -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index b89ea3c0ee1a..c9453b3d47b4 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -70,14 +70,6 @@ struct cpu_mask_set { uint gen; }; -struct hfi1_affinity { - struct cpu_mask_set def_intr; - struct cpu_mask_set rcv_intr; - struct cpumask real_cpu_mask; - /* spin lock to protect affinity struct */ - spinlock_t lock; -}; - struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ @@ -119,6 +111,7 @@ struct hfi1_affinity_node_list { struct cpumask real_cpu_mask; struct cpu_mask_set proc; int num_core_siblings; + int num_possible_nodes; int num_online_nodes; int num_online_cpus; struct mutex lock; /* protects affinity nodes */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9bf5f23544d4..37d8af50cc13 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8488,7 +8488,10 @@ static int do_8051_command( */ if (type == HCMD_WRITE_LCB_CSR) { in_data |= ((*out_data) & 0xffffffffffull) << 8; - reg = ((((*out_data) >> 40) & 0xff) << + /* must preserve COMPLETED - it is tied to hardware */ + reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0); + reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK; + reg |= ((((*out_data) >> 40) & 0xff) << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT) | ((((*out_data) >> 48) & 0xffff) << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); @@ -9567,11 +9570,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd) if (HFI1_CAP_IS_KSET(EXTENDED_PSN)) add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK); - guid = ppd->guid; + guid = ppd->guids[HFI1_PORT_GUID_INDEX]; if (!guid) { if (dd->base_guid) guid = dd->base_guid + ppd->port - 1; - ppd->guid = guid; + ppd->guids[HFI1_PORT_GUID_INDEX] = guid; } /* Set linkinit_reason on power up per OPA spec */ diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 5b9993899789..5bfa839d1c48 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -415,6 +415,9 @@ #define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32 #define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0 #define ASIC_CFG_SCRATCH (ASIC + 0x000000000020) +#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08) +#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10) +#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18) #define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050) #define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308) #define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 632ba21759ab..8725f4c086cf 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, return ret; } +/* read the dc8051 memory */ +static ssize_t dc8051_memory_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + ssize_t rval; + void *tmp; + loff_t start, end; + + /* the checks below expect the position to be positive */ + if (*ppos < 0) + return -EINVAL; + + tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + /* + * Fill in the requested portion of the temporary buffer from the + * 8051 memory. The 8051 memory read is done in terms of 8 bytes. + * Adjust start and end to fit. Skip reading anything if out of + * range. + */ + start = *ppos & ~0x7; /* round down */ + if (start < DC8051_DATA_MEM_SIZE) { + end = (*ppos + count + 7) & ~0x7; /* round up */ + if (end > DC8051_DATA_MEM_SIZE) + end = DC8051_DATA_MEM_SIZE; + rval = read_8051_data(ppd->dd, start, end - start, + (u64 *)(tmp + start)); + if (rval) + goto done; + } + + rval = simple_read_from_buffer(buf, count, ppos, tmp, + DC8051_DATA_MEM_SIZE); +done: + kfree(tmp); + return rval; +} + +static ssize_t debugfs_lcb_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off; + u64 data; + + if (*ppos < 0) + return -EINVAL; + /* only read 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (read_lcb_csr(dd, csr_off, (u64 *)&data)) + break; /* failed */ + if (put_user(data, (unsigned long __user *)(buf + total))) + break; + } + *ppos += total; + return total; +} + +static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off, data; + + if (*ppos < 0) + return -EINVAL; + /* only write 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (get_user(data, (unsigned long __user *)(buf + total))) + break; + if (write_lcb_csr(dd, csr_off, data)) + break; /* failed */ + } + *ppos += total; + return total; +} + /* * read the per-port QSFP data for ppd */ @@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), + DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), + DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), }; static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6563e4d38b80..d4261163bd25 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -793,8 +793,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index e70c223801b4..26da124c88e2 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -207,6 +207,40 @@ done_asic: /* magic character sequence that trails an image */ #define IMAGE_TRAIL_MAGIC "egamiAPO" +/* EPROM file types */ +#define HFI1_EFT_PLATFORM_CONFIG 2 + +/* segment size - 128 KiB */ +#define SEG_SIZE (128 * 1024) + +struct hfi1_eprom_footer { + u32 oprom_size; /* size of the oprom, in bytes */ + u16 num_table_entries; + u16 version; /* version of this footer */ + u32 magic; /* must be last */ +}; + +struct hfi1_eprom_table_entry { + u32 type; /* file type */ + u32 offset; /* file offset from start of EPROM */ + u32 size; /* file size, in bytes */ +}; + +/* + * Calculate the max number of table entries that will fit within a directory + * buffer of size 'dir_size'. + */ +#define MAX_TABLE_ENTRIES(dir_size) \ + (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \ + sizeof(struct hfi1_eprom_table_entry)) + +#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \ + (sizeof(struct hfi1_eprom_table_entry) * (n))) + +#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a)) +#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm') +#define FOOTER_VERSION 1 + /* * Read all of partition 1. The actual file is at the front. Adjust * the returned size if a trailing image magic is found. @@ -242,6 +276,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, } /* + * The segment magic has been checked. There is a footer and table of + * contents present. + * + * directory is a u32 aligned buffer of size EP_PAGE_SIZE. + */ +static int read_segment_platform_config(struct hfi1_devdata *dd, + void *directory, void **data, u32 *size) +{ + struct hfi1_eprom_footer *footer; + struct hfi1_eprom_table_entry *table; + struct hfi1_eprom_table_entry *entry; + void *buffer = NULL; + void *table_buffer = NULL; + int ret, i; + u32 directory_size; + u32 seg_base, seg_offset; + u32 bytes_available, ncopied, to_copy; + + /* the footer is at the end of the directory */ + footer = (struct hfi1_eprom_footer *) + (directory + EP_PAGE_SIZE - sizeof(*footer)); + + /* make sure the structure version is supported */ + if (footer->version != FOOTER_VERSION) + return -EINVAL; + + /* oprom size cannot be larger than a segment */ + if (footer->oprom_size >= SEG_SIZE) + return -EINVAL; + + /* the file table must fit in a segment with the oprom */ + if (footer->num_table_entries > + MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size)) + return -EINVAL; + + /* find the file table start, which precedes the footer */ + directory_size = DIRECTORY_SIZE(footer->num_table_entries); + if (directory_size <= EP_PAGE_SIZE) { + /* the file table fits into the directory buffer handed in */ + table = (struct hfi1_eprom_table_entry *) + (directory + EP_PAGE_SIZE - directory_size); + } else { + /* need to allocate and read more */ + table_buffer = kmalloc(directory_size, GFP_KERNEL); + if (!table_buffer) + return -ENOMEM; + ret = read_length(dd, SEG_SIZE - directory_size, + directory_size, table_buffer); + if (ret) + goto done; + table = table_buffer; + } + + /* look for the platform configuration file in the table */ + for (entry = NULL, i = 0; i < footer->num_table_entries; i++) { + if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) { + entry = &table[i]; + break; + } + } + if (!entry) { + ret = -ENOENT; + goto done; + } + + /* + * Sanity check on the configuration file size - it should never + * be larger than 4 KiB. + */ + if (entry->size > (4 * 1024)) { + dd_dev_err(dd, "Bad configuration file size 0x%x\n", + entry->size); + ret = -EINVAL; + goto done; + } + + /* check for bogus offset and size that wrap when added together */ + if (entry->offset + entry->size < entry->offset) { + dd_dev_err(dd, + "Bad configuration file start + size 0x%x+0x%x\n", + entry->offset, entry->size); + ret = -EINVAL; + goto done; + } + + /* allocate the buffer to return */ + buffer = kmalloc(entry->size, GFP_KERNEL); + if (!buffer) { + ret = -ENOMEM; + goto done; + } + + /* + * Extract the file by looping over segments until it is fully read. + */ + seg_offset = entry->offset % SEG_SIZE; + seg_base = entry->offset - seg_offset; + ncopied = 0; + while (ncopied < entry->size) { + /* calculate data bytes available in this segment */ + + /* start with the bytes from the current offset to the end */ + bytes_available = SEG_SIZE - seg_offset; + /* subtract off footer and table from segment 0 */ + if (seg_base == 0) { + /* + * Sanity check: should not have a starting point + * at or within the directory. + */ + if (bytes_available <= directory_size) { + dd_dev_err(dd, + "Bad configuration file - offset 0x%x within footer+table\n", + entry->offset); + ret = -EINVAL; + goto done; + } + bytes_available -= directory_size; + } + + /* calculate bytes wanted */ + to_copy = entry->size - ncopied; + + /* max out at the available bytes in this segment */ + if (to_copy > bytes_available) + to_copy = bytes_available; + + /* + * Read from the EPROM. + * + * The sanity check for entry->offset is done in read_length(). + * The EPROM offset is validated against what the hardware + * addressing supports. In addition, if the offset is larger + * than the actual EPROM, it silently wraps. It will work + * fine, though the reader may not get what they expected + * from the EPROM. + */ + ret = read_length(dd, seg_base + seg_offset, to_copy, + buffer + ncopied); + if (ret) + goto done; + + ncopied += to_copy; + + /* set up for next segment */ + seg_offset = footer->oprom_size; + seg_base += SEG_SIZE; + } + + /* success */ + ret = 0; + *data = buffer; + *size = entry->size; + +done: + kfree(table_buffer); + if (ret) + kfree(buffer); + return ret; +} + +/* * Read the platform configuration file from the EPROM. * * On success, an allocated buffer containing the data and its size are @@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, * -EBUSY - not able to acquire access to the EPROM * -ENOENT - no recognizable file written * -ENOMEM - buffer could not be allocated + * -EINVAL - invalid EPROM contentents found */ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) { @@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) if (ret) return -EBUSY; - /* read the last page of P0 for the EPROM format magic */ - ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + /* read the last page of the segment for the EPROM format magic */ + ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); if (ret) goto done; - /* last dword of P0 contains a magic indicator */ - if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* last dword of the segment contains a magic value */ + if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) { + /* segment format */ + ret = read_segment_platform_config(dd, directory, data, size); + } else { /* partition format */ ret = read_partition_platform_config(dd, data, size); - goto done; } - /* nothing recognized */ - ret = -ENOENT; - done: release_chip_resource(dd, CR_EPROM); return ret; diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 13db8eb4f4ec..0dd50cdb039a 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1; const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 }; static const u8 all_pcie_serdes_broadcast = 0xe0; +static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { + 0, + SYSTEM_TABLE_MAX, + PORT_TABLE_MAX, + RX_PRESET_TABLE_MAX, + TX_PRESET_TABLE_MAX, + QSFP_ATTEN_TABLE_MAX, + VARIABLE_SETTINGS_TABLE_MAX +}; + /* forwards */ static void dispose_one_firmware(struct firmware_details *fdet); static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, @@ -263,11 +273,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result) u64 reg; int count; - /* start the read at the given address */ - reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) - << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT) - | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK; + /* step 1: set the address, clear enable */ + reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) + << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT; write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg); + /* step 2: enable */ + write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, + reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK); /* wait until ACCESS_COMPLETED is set */ count = 0; @@ -707,6 +719,9 @@ static int obtain_firmware(struct hfi1_devdata *dd) &dd->pcidev->dev); if (err) { platform_config = NULL; + dd_dev_err(dd, + "%s: No default platform config file found\n", + __func__); goto done; } dd->platform_config.data = platform_config->data; @@ -1761,8 +1776,17 @@ int parse_platform_config(struct hfi1_devdata *dd) u32 record_idx = 0, table_type = 0, table_length_dwords = 0; int ret = -EINVAL; /* assume failure */ + /* + * For integrated devices that did not fall back to the default file, + * the SI tuning information for active channels is acquired from the + * scratch register bitmap, thus there is no platform config to parse. + * Skip parsing in these situations. + */ + if (is_integrated(dd) && !platform_config_load) + return 0; + if (!dd->platform_config.data) { - dd_dev_info(dd, "%s: Missing config file\n", __func__); + dd_dev_err(dd, "%s: Missing config file\n", __func__); goto bail; } ptr = (u32 *)dd->platform_config.data; @@ -1770,7 +1794,7 @@ int parse_platform_config(struct hfi1_devdata *dd) magic_num = *ptr; ptr++; if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { - dd_dev_info(dd, "%s: Bad config file\n", __func__); + dd_dev_err(dd, "%s: Bad config file\n", __func__); goto bail; } @@ -1797,9 +1821,9 @@ int parse_platform_config(struct hfi1_devdata *dd) header1 = *ptr; header2 = *(ptr + 1); if (header1 != ~header2) { - dd_dev_info(dd, "%s: Failed validation at offset %ld\n", - __func__, (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed validation at offset %ld\n", + __func__, (ptr - (u32 *) + dd->platform_config.data)); goto bail; } @@ -1841,11 +1865,11 @@ int parse_platform_config(struct hfi1_devdata *dd) table_length_dwords; break; default: - dd_dev_info(dd, - "%s: Unknown data table %d, offset %ld\n", - __func__, table_type, - (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown data table %d, offset %ld\n", + __func__, table_type, + (ptr - (u32 *) + dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1865,11 +1889,11 @@ int parse_platform_config(struct hfi1_devdata *dd) case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: - dd_dev_info(dd, - "%s: Unknown meta table %d, offset %ld\n", - __func__, table_type, - (ptr - - (u32 *)dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown meta table %d, offset %ld\n", + __func__, table_type, + (ptr - + (u32 *)dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = @@ -1884,10 +1908,9 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Jump the table */ ptr += table_length_dwords; if (crc != *ptr) { - dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n", - __func__, (ptr - - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n", + __func__, (ptr - + (u32 *)dd->platform_config.data)); goto bail; } /* Jump the CRC DWORD */ @@ -1901,6 +1924,84 @@ bail: return ret; } +static void get_integrated_platform_config_field( + struct hfi1_devdata *dd, + enum platform_config_table_type_encoding table_type, + int field_index, u32 *data) +{ + struct hfi1_pportdata *ppd = dd->pport; + u8 *cache = ppd->qsfp_info.cache; + u32 tx_preset = 0; + + switch (table_type) { + case PLATFORM_CONFIG_SYSTEM_TABLE: + if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX) + *data = ppd->max_power_class; + else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G) + *data = ppd->default_atten; + break; + case PLATFORM_CONFIG_PORT_TABLE: + if (field_index == PORT_TABLE_PORT_TYPE) + *data = ppd->port_type; + else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G) + *data = ppd->local_atten; + else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G) + *data = ppd->remote_atten; + break; + case PLATFORM_CONFIG_RX_PRESET_TABLE: + if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY) + *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >> + QSFP_RX_CDR_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >> + QSFP_RX_EMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >> + QSFP_RX_AMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR) + *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >> + QSFP_RX_CDR_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP) + *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >> + QSFP_RX_EMP_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP) + *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >> + QSFP_RX_AMP_SHIFT; + break; + case PLATFORM_CONFIG_TX_PRESET_TABLE: + if (cache[QSFP_EQ_INFO_OFFS] & 0x4) + tx_preset = ppd->tx_preset_eq; + else + tx_preset = ppd->tx_preset_noeq; + if (field_index == TX_PRESET_TABLE_PRECUR) + *data = (tx_preset & TX_PRECUR_SMASK) >> + TX_PRECUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_ATTN) + *data = (tx_preset & TX_ATTN_SMASK) >> + TX_ATTN_SHIFT; + else if (field_index == TX_PRESET_TABLE_POSTCUR) + *data = (tx_preset & TX_POSTCUR_SMASK) >> + TX_POSTCUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY) + *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >> + QSFP_TX_CDR_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY) + *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >> + QSFP_TX_EQ_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR) + *data = (tx_preset & QSFP_TX_CDR_SMASK) >> + QSFP_TX_CDR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ) + *data = (tx_preset & QSFP_TX_EQ_SMASK) >> + QSFP_TX_EQ_SHIFT; + break; + case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: + case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: + default: + break; + } +} + static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, int field, u32 *field_len_bits, u32 *field_start_bits) @@ -1976,6 +2077,15 @@ int get_platform_config_field(struct hfi1_devdata *dd, else return -EINVAL; + if (is_integrated(dd) && !platform_config_load) { + /* + * Use saved configuration from ppd for integrated platforms + */ + get_integrated_platform_config_field(dd, table_type, + field_index, data); + return 0; + } + ret = get_platform_fw_field_metadata(dd, table_type, field_index, &field_len_bits, &field_start_bits); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7eef11b316ff..4163596ce4c9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -512,6 +512,9 @@ struct rvt_sge_state; #define HFI1_MIN_VLS_SUPPORTED 1 #define HFI1_MAX_VLS_SUPPORTED 8 +#define HFI1_GUIDS_PER_PORT 5 +#define HFI1_PORT_GUID_INDEX 0 + static inline void incr_cntr64(u64 *cntr) { if (*cntr < (u64)-1LL) @@ -579,11 +582,20 @@ struct hfi1_pportdata { struct kobject vl2mtu_kobj; /* PHY support */ - u32 port_type; struct qsfp_data qsfp_info; + /* Values for SI tuning of SerDes */ + u32 port_type; + u32 tx_preset_eq; + u32 tx_preset_noeq; + u32 rx_preset; + u8 local_atten; + u8 remote_atten; + u8 default_atten; + u8 max_power_class; + + /* GUIDs for this interface, in host order, guids[0] is a port guid */ + u64 guids[HFI1_GUIDS_PER_PORT]; - /* GUID for this interface, in host order */ - u64 guid; /* GUID for peer interface, in host order */ u64 neighbor_guid; @@ -848,32 +860,29 @@ struct hfi1_devdata { u8 __iomem *kregend; /* physical address of chip for io_remap, etc. */ resource_size_t physaddr; - /* receive context data */ - struct hfi1_ctxtdata **rcd; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ + struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ struct send_context_info *send_contexts; /* map hardware send contexts to software index */ u8 *hw_to_sw; /* spinlock for allocating and releasing send context resources */ spinlock_t sc_lock; - /* Per VL data. Enough for all VLs but not all elements are set/used. */ - struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* lock for pio_map */ spinlock_t pio_map_lock; + /* Send Context initialization lock. */ + spinlock_t sc_init_lock; + /* lock for sdma_map */ + spinlock_t sde_map_lock; /* array of kernel send contexts */ struct send_context **kernel_send_context; /* array of vl maps */ struct pio_vl_map __rcu *pio_map; - /* seqlock for sc2vl */ - seqlock_t sc2vl_lock; - u64 sc2vl[4]; - /* Send Context initialization lock. */ - spinlock_t sc_init_lock; + /* default flags to last descriptor */ + u64 default_desc1; /* fields common to all SDMA engines */ - /* default flags to last descriptor */ - u64 default_desc1; volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ dma_addr_t sdma_heads_phys; void *sdma_pad_dma; /* DMA'ed by chip */ @@ -884,8 +893,6 @@ struct hfi1_devdata { u32 chip_sdma_engines; /* num used */ u32 num_sdma; - /* lock for sdma_map */ - spinlock_t sde_map_lock; /* array of engines sized by num_sdma */ struct sdma_engine *per_sdma; /* array of vl maps */ @@ -894,14 +901,11 @@ struct hfi1_devdata { wait_queue_head_t sdma_unfreeze_wq; atomic_t sdma_unfreeze_count; + u32 lcb_access_count; /* count of LCB users */ + /* common data between shared ASIC HFIs in this OS */ struct hfi1_asic_data *asic_data; - /* hfi1_pportdata, points to array of (physical) port-specific - * data structs, indexed by pidx (0..n-1) - */ - struct hfi1_pportdata *pport; - /* mem-mapped pointer to base of PIO buffers */ void __iomem *piobase; /* @@ -918,20 +922,13 @@ struct hfi1_devdata { /* send context numbers and sizes for each type */ struct sc_config_sizes sc_sizes[SC_MAX]; - u32 lcb_access_count; /* count of LCB users */ - char *boardname; /* human readable board info */ - /* device (not port) flags, basically device capabilities */ - u32 flags; - /* reset value */ u64 z_int_counter; u64 z_rcv_limit; u64 z_send_schedule; - /* percpu int_counter */ - u64 __percpu *int_counter; - u64 __percpu *rcv_limit; + u64 __percpu *send_schedule; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; @@ -946,6 +943,7 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; + u32 freezelen; /* max length of freezemsg */ u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ @@ -967,7 +965,6 @@ struct hfi1_devdata { * IB link status cheaply */ struct hfi1_status *status; - u32 freezelen; /* max length of freezemsg */ /* revision register shadow */ u64 revision; @@ -995,6 +992,8 @@ struct hfi1_devdata { u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; + /* default link down value (poll/sleep) */ + u8 link_default; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1030,8 +1029,6 @@ struct hfi1_devdata { u8 hfi1_id; /* implementation code */ u8 icode; - /* default link down value (poll/sleep) */ - u8 link_default; /* vAU of this device */ u8 vau; /* vCU of this device */ @@ -1042,27 +1039,17 @@ struct hfi1_devdata { u16 vl15_init; /* Misc small ints */ - /* Number of physical ports available */ - u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; u8 n_krcv_queues; u8 qos_shift; - u8 qpn_mask; - u16 rhf_offset; /* offset of RHF within receive header entry */ u16 irev; /* implementation revision */ u16 dc8051_ver; /* 8051 firmware version */ + spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; struct platform_config_cache pcfg_cache; struct diag_client *diag_client; - spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ - - u8 psxmitwait_supported; - /* cycle length of PS* counters in HW (in picoseconds) */ - u16 psxmitwait_check_rate; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1077,6 +1064,9 @@ struct hfi1_devdata { struct rcv_array_data rcv_entries; + /* cycle length of PS* counters in HW (in picoseconds) */ + u16 psxmitwait_check_rate; + /* * 64 bit synthetic counters */ @@ -1109,11 +1099,11 @@ struct hfi1_devdata { struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; - u8 err_info_uncorrectable; - u8 err_info_fmconfig; atomic_t drop_packet; u8 do_drop; + u8 err_info_uncorrectable; + u8 err_info_fmconfig; /* * Software counters for the status bits defined by the @@ -1136,47 +1126,70 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; + /* receive interrupt function */ rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; + /* Save the enabled LCB error bits */ + u64 lcb_err_en; + /* * Handlers for outgoing data so that snoop/capture does not * have to have its hooks in the send path */ - send_routine process_pio_send; + send_routine process_pio_send ____cacheline_aligned_in_smp; send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + /* hfi1_pportdata, points to array of (physical) port-specific + * data structs, indexed by pidx (0..n-1) + */ + struct hfi1_pportdata *pport; + /* receive context data */ + struct hfi1_ctxtdata **rcd; + u64 __percpu *int_counter; + /* device (not port) flags, basically device capabilities */ + u16 flags; + /* Number of physical ports available */ + u8 num_pports; + /* Lowest context number which can be used by user processes */ + u8 first_user_ctxt; + /* adding a new field here would make it part of this cacheline */ + + /* seqlock for sc2vl */ + seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; + u64 sc2vl[4]; + /* receive interrupt functions */ + rhf_rcv_function_ptr *rhf_rcv_function_map; + u64 __percpu *rcv_limit; + u16 rhf_offset; /* offset of RHF within receive header entry */ + /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ u8 oui1; u8 oui2; u8 oui3; + u8 dc_shutdown; + /* Timer and counter used to detect RcvBufOvflCnt changes */ struct timer_list rcverr_timer; - u32 rcv_ovfl_cnt; wait_queue_head_t event_queue; - /* Save the enabled LCB error bits */ - u64 lcb_err_en; - u8 dc_shutdown; - /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; dma_addr_t rcvhdrtail_dummy_dma; - bool eprom_available; /* true if EPROM is available for this device */ - bool aspm_supported; /* Does HW support ASPM */ - bool aspm_enabled; /* ASPM state: enabled/disabled */ + u32 rcv_ovfl_cnt; /* Serialize ASPM enable/disable between multiple verbs contexts */ spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; - struct hfi1_affinity *affinity; + bool eprom_available; /* true if EPROM is available for this device */ + bool aspm_supported; /* Does HW support ASPM */ + bool aspm_enabled; /* ASPM state: enabled/disabled */ struct rhashtable sdma_rht; + struct kobject kobj; }; @@ -1633,6 +1646,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) } /* + * Return the indexed GUID from the port GUIDs table. + */ +static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index) +{ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + WARN_ON(index >= HFI1_GUIDS_PER_PORT); + return cpu_to_be64(ppd->guids[index]); +} + +/* * Called by readers of cc_state only, must call under rcu_read_lock(). */ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) @@ -2003,6 +2027,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd) return i2c_target(dd->hfi1_id); } +/* Is this device integrated or discrete? */ +static inline bool is_integrated(struct hfi1_devdata *dd) +{ + return dd->pcidev->device == PCI_DEVICE_ID_INTEL1; +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..d9740ddea6f1 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -64,6 +64,7 @@ struct sdma_engine; /** * struct iowait - linkage for delayed progress/waiting * @list: used to add/insert into QP/PQ wait lists + * @lock: uses to record the list head lock * @tx_head: overflow list of sdma_txreq's * @sleep: no space callback * @wakeup: space callback wakeup @@ -91,6 +92,11 @@ struct sdma_engine; * so sleeping is not allowed. * * The wait_dma member along with the iow + * + * The lock field is used by waiters to record + * the seqlock_t that guards the list head. + * Waiters explicity know that, but the destroy + * code that unwaits QPs does not. */ struct iowait { @@ -103,6 +109,7 @@ struct iowait { unsigned seq); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); + seqlock_t *lock; struct work_struct iowork; wait_queue_head_t wait_dma; wait_queue_head_t wait_pio; @@ -141,6 +148,7 @@ static inline void iowait_init( void (*sdma_drained)(struct iowait *wait)) { wait->count = 0; + wait->lock = NULL; INIT_LIST_HEAD(&wait->list); INIT_LIST_HEAD(&wait->tx_head); INIT_WORK(&wait->iowork, func); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 9487c9bb8920..0ef62e67f283 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, ni = (struct opa_node_info *)data; /* GUID 0 is illegal */ - if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) { + if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } - ni->port_guid = cpu_to_be64(dd->pport[pidx].guid); + ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); ni->base_version = OPA_MGMT_BASE_VERSION; ni->class_version = OPA_SMI_CLASS_VERSION; ni->node_type = 1; /* channel adapter */ ni->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ ni->system_image_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - ni->node_guid = cpu_to_be64(dd->pport->guid); + ni->node_guid = ibdev->node_guid; ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); ni->device_id = cpu_to_be16(dd->pcidev->device); ni->revision = cpu_to_be32(dd->minrev); @@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, /* GUID 0 is illegal */ if (smp->attr_mod || pidx >= dd->num_pports || - dd->pport[pidx].guid == 0) + ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; - else - nip->port_guid = cpu_to_be64(dd->pport[pidx].guid); + return reply((struct ib_mad_hdr *)smp); + } + nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); nip->base_version = OPA_MGMT_BASE_VERSION; nip->class_version = OPA_SMI_CLASS_VERSION; nip->node_type = 1; /* channel adapter */ nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - nip->node_guid = cpu_to_be64(dd->pport->guid); + nip->node_guid = ibdev->node_guid; nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); nip->device_id = cpu_to_be16(dd->pcidev->device); nip->revision = cpu_to_be32(dd->minrev); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 7ad30898fc19..ccbf52c8ff6f 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler, struct list_head *del_list); static void handle_remove(struct work_struct *work); -static struct mmu_notifier_ops mn_opts = { +static const struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, .invalidate_range_start = mmu_notifier_range_start, }; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d15ffed48a39..64c9eeb52d86 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->hw_context = hw_context; cr_group_addresses(sc, &dma); sc->credits = sci->credits; + sc->size = sc->credits * PIO_BLOCK_SIZE; /* PIO Send Memory Address details */ #define PIO_ADDR_CONTEXT_MASK 0xfful @@ -1249,6 +1250,7 @@ int sc_enable(struct send_context *sc) sc->free = 0; sc->alloc_free = 0; sc->fill = 0; + sc->fill_wrap = 0; sc->sr_head = 0; sc->sr_tail = 0; sc->flags = 0; @@ -1392,7 +1394,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, unsigned long flags; unsigned long avail; unsigned long blocks = dwords_to_blocks(dw_len); - unsigned long start_fill; + u32 fill_wrap; int trycount = 0; u32 head, next; @@ -1417,9 +1419,7 @@ retry: (sc->fill - sc->alloc_free); if (blocks > avail) { /* still no room, actively update */ - spin_unlock_irqrestore(&sc->alloc_lock, flags); sc_release_update(sc); - spin_lock_irqsave(&sc->alloc_lock, flags); sc->alloc_free = ACCESS_ONCE(sc->free); trycount++; goto retry; @@ -1435,8 +1435,11 @@ retry: head = sc->sr_head; /* "allocate" the buffer */ - start_fill = sc->fill; sc->fill += blocks; + fill_wrap = sc->fill_wrap; + sc->fill_wrap += blocks; + if (sc->fill_wrap >= sc->credits) + sc->fill_wrap = sc->fill_wrap - sc->credits; /* * Fill the parts that the releaser looks at before moving the head. @@ -1465,11 +1468,8 @@ retry: spin_unlock_irqrestore(&sc->alloc_lock, flags); /* finish filling in the buffer outside the lock */ - pbuf->start = sc->base_addr + ((start_fill % sc->credits) - * PIO_BLOCK_SIZE); - pbuf->size = sc->credits * PIO_BLOCK_SIZE; - pbuf->end = sc->base_addr + pbuf->size; - pbuf->block_count = blocks; + pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; + pbuf->end = sc->base_addr + sc->size; pbuf->qw_written = 0; pbuf->carry_bytes = 0; pbuf->carry.val64 = 0; @@ -1580,6 +1580,7 @@ static void sc_piobufavail(struct send_context *sc) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ qps[n++] = qp; } @@ -2035,28 +2036,17 @@ freesc15: int init_credit_return(struct hfi1_devdata *dd) { int ret; - int num_numa; int i; - num_numa = num_online_nodes(); - /* enforce the expectation that the numas are compact */ - for (i = 0; i < num_numa; i++) { - if (!node_online(i)) { - dd_dev_err(dd, "NUMA nodes are not compact\n"); - ret = -EINVAL; - goto done; - } - } - dd->cr_base = kcalloc( - num_numa, + node_affinity.num_possible_nodes, sizeof(struct credit_return_base), GFP_KERNEL); if (!dd->cr_base) { ret = -ENOMEM; goto done; } - for (i = 0; i < num_numa; i++) { + for_each_node_with_cpus(i) { int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); set_dev_node(&dd->pcidev->dev, i); @@ -2083,14 +2073,11 @@ done: void free_credit_return(struct hfi1_devdata *dd) { - int num_numa; int i; if (!dd->cr_base) return; - - num_numa = num_online_nodes(); - for (i = 0; i < num_numa; i++) { + for (i = 0; i < node_affinity.num_possible_nodes; i++) { if (dd->cr_base[i].va) { dma_free_coherent(&dd->pcidev->dev, TXE_NUM_CONTEXTS * diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index e709eaf743b5..867e5ffc3595 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -83,53 +83,55 @@ struct pio_buf { void *arg; /* argument for cb */ void __iomem *start; /* buffer start address */ void __iomem *end; /* context end address */ - unsigned long size; /* context size, in bytes */ unsigned long sent_at; /* buffer is sent when <= free */ - u32 block_count; /* size of buffer, in blocks */ - u32 qw_written; /* QW written so far */ - u32 carry_bytes; /* number of valid bytes in carry */ union mix carry; /* pending unwritten bytes */ + u16 qw_written; /* QW written so far */ + u8 carry_bytes; /* number of valid bytes in carry */ }; /* cache line aligned pio buffer array */ union pio_shadow_ring { struct pio_buf pbuf; - u64 unused[16]; /* cache line spacer */ } ____cacheline_aligned; /* per-NUMA send context */ struct send_context { /* read-only after init */ struct hfi1_devdata *dd; /* device */ - void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ + void __iomem *base_addr; /* start of PIO memory */ + u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u32 size; /* context size, in bytes */ - volatile __le64 *hw_free; /* HW free counter */ - struct work_struct halt_work; /* halted context work queue entry */ - unsigned long flags; /* flags */ int node; /* context home node */ - int type; /* context type */ - u32 sw_index; /* software index number */ - u32 hw_context; /* hardware context number */ - u32 credits; /* number of blocks in context */ u32 sr_size; /* size of the shadow ring */ - u32 group; /* credit return group */ + u16 flags; /* flags */ + u8 type; /* context type */ + u8 sw_index; /* software index number */ + u8 hw_context; /* hardware context number */ + u8 group; /* credit return group */ + /* allocator fields */ spinlock_t alloc_lock ____cacheline_aligned_in_smp; + u32 sr_head; /* shadow ring head */ unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ - u32 sr_head; /* shadow ring head */ + u32 fill_wrap; /* tracks fill within ring */ + u32 credits; /* number of blocks in context */ + /* adding a new field here would make it part of this cacheline */ + /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; - unsigned long free; /* official free count */ u32 sr_tail; /* shadow ring tail */ + unsigned long free; /* official free count */ + volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; - u64 credit_ctrl; /* cache for credit control */ u32 credit_intr_count; /* count of credit intr users */ - u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u64 credit_ctrl; /* cache for credit control */ wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ + struct work_struct halt_work; /* halted context work queue entry */ }; /* send context flags */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index aa7773643107..03024cec78dd 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we've wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 202433178864..838fe84e285a 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -49,6 +49,90 @@ #include "efivar.h" #include "eprom.h" +static int validate_scratch_checksum(struct hfi1_devdata *dd) +{ + u64 checksum = 0, temp_scratch = 0; + int i, j, version; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; + + /* Prevent power on default of all zeroes from passing checksum */ + if (!version) + return 0; + + /* + * ASIC scratch 0 only contains the checksum and bitmap version as + * fields of interest, both of which are handled separately from the + * loop below, so skip it + */ + checksum += version; + for (i = 1; i < ASIC_NUM_SCRATCH; i++) { + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i)); + for (j = sizeof(u64); j != 0; j -= 2) { + checksum += (temp_scratch & 0xFFFF); + temp_scratch >>= 16; + } + } + + while (checksum >> 16) + checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16); + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + temp_scratch &= CHECKSUM_SMASK; + temp_scratch >>= CHECKSUM_SHIFT; + + if (checksum + temp_scratch == 0xFFFF) + return 1; + return 0; +} + +static void save_platform_config_fields(struct hfi1_devdata *dd) +{ + struct hfi1_pportdata *ppd = dd->pport; + u64 temp_scratch = 0, temp_dest = 0; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK : + PORT0_PORT_TYPE_SMASK); + ppd->port_type = temp_dest >> + (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT : + PORT0_PORT_TYPE_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK : + PORT0_LOCAL_ATTEN_SMASK); + ppd->local_atten = temp_dest >> + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT : + PORT0_LOCAL_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK : + PORT0_REMOTE_ATTEN_SMASK); + ppd->remote_atten = temp_dest >> + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT : + PORT0_REMOTE_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK : + PORT0_DEFAULT_ATTEN_SMASK); + ppd->default_atten = temp_dest >> + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT : + PORT0_DEFAULT_ATTEN_SHIFT); + + temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 : + ASIC_CFG_SCRATCH_2); + + ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT; + ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT; + ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT; + + ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >> + QSFP_MAX_POWER_SHIFT; +} + void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; @@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd) u8 *temp_platform_config = NULL; u32 esize; - ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, - &esize); - if (!ret) { - /* success */ - size = esize; - goto success; + if (is_integrated(dd)) { + if (validate_scratch_checksum(dd)) { + save_platform_config_fields(dd); + return; + } + dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", + __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); + } else { + ret = eprom_read_platform_config(dd, + (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = esize; + return; + } + /* fail, try EFI variable */ + + ret = read_hfi1_efi_var(dd, "configuration", &size, + (void **)&temp_platform_config); + if (!ret) { + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = size; + return; + } } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) - goto success; - - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); + dd_dev_err(dd, + "%s: Failed to get platform config, falling back to sub-optimal default file\n", + __func__); /* fall back to request firmware */ platform_config_load = 1; - return; - -success: - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; } void free_platform_config(struct hfi1_devdata *dd) { if (!platform_config_load) { /* - * was loaded from EFI, release memory - * allocated by read_efi_var + * was loaded from EFI or the EPROM, release memory + * allocated by read_efi_var/eprom_read_platform_config */ kfree(dd->platform_config.data); } @@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd) void get_port_type(struct hfi1_pportdata *ppd) { int ret; + u32 temp; ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, + PORT_TABLE_PORT_TYPE, &temp, 4); - if (ret) + if (ret) { ppd->port_type = PORT_TYPE_UNKNOWN; + return; + } + ppd->port_type = temp; } int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) @@ -538,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, } } +/* + * Return a special SerDes setting for low power AOC cables. The power class + * threshold and setting being used were all found by empirical testing. + * + * Summary of the logic: + * + * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4) + * return 0xe + * return 0; // leave at default + */ +static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) +{ + u8 *cache = ppd->qsfp_info.cache; + int power_class; + + /* QSFP only */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; /* leave at default */ + + /* active optical cables only */ + switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { + case 0x0 ... 0x9: /* fallthrough */ + case 0xC: /* fallthrough */ + case 0xE: + /* active AOC */ + power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); + if (power_class < QSFP_POWER_CLASS_4) + return 0xe; + } + return 0; /* leave at default */ +} + static void apply_tunings( struct hfi1_pportdata *ppd, u32 tx_preset_index, u8 tuning_method, u32 total_atten, u8 limiting_active) @@ -606,7 +737,17 @@ static void apply_tunings( tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4); postcur = tx_preset; - config_data = precur | (attn << 8) | (postcur << 16); + /* + * NOTES: + * o The aoc_low_power_setting is applied to all lanes even + * though only lane 0's value is examined by the firmware. + * o A lingering low power setting after a cable swap does + * not occur. On cable unplug the 8051 is reset and + * restarted on cable insert. This resets all settings to + * their default, erasing any previous low power setting. + */ + config_data = precur | (attn << 8) | (postcur << 16) | + (aoc_low_power_setting(ppd) << 24); apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data, "Applying TX settings"); diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index e2c21613c326..eed0aa9124fa 100644 --- a/drivers/infiniband/hw/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -168,16 +168,6 @@ struct platform_config_cache { struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX]; }; -static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { - 0, - SYSTEM_TABLE_MAX, - PORT_TABLE_MAX, - RX_PRESET_TABLE_MAX, - TX_PRESET_TABLE_MAX, - QSFP_ATTEN_TABLE_MAX, - VARIABLE_SETTINGS_TABLE_MAX -}; - /* This section defines default values and encodings for the * fields defined for each table above */ @@ -295,6 +285,123 @@ enum link_tuning_encoding { OPA_UNKNOWN_TUNING }; +/* + * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch + * registers for integrated platforms + */ +#define PORT0_PORT_TYPE_SHIFT 0 +#define PORT0_LOCAL_ATTEN_SHIFT 4 +#define PORT0_REMOTE_ATTEN_SHIFT 10 +#define PORT0_DEFAULT_ATTEN_SHIFT 32 + +#define PORT1_PORT_TYPE_SHIFT 16 +#define PORT1_LOCAL_ATTEN_SHIFT 20 +#define PORT1_REMOTE_ATTEN_SHIFT 26 +#define PORT1_DEFAULT_ATTEN_SHIFT 40 + +#define PORT0_PORT_TYPE_MASK 0xFUL +#define PORT0_LOCAL_ATTEN_MASK 0x3FUL +#define PORT0_REMOTE_ATTEN_MASK 0x3FUL +#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT1_PORT_TYPE_MASK 0xFUL +#define PORT1_LOCAL_ATTEN_MASK 0x3FUL +#define PORT1_REMOTE_ATTEN_MASK 0x3FUL +#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \ + PORT0_PORT_TYPE_SHIFT) +#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \ + PORT0_LOCAL_ATTEN_SHIFT) +#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \ + PORT0_REMOTE_ATTEN_SHIFT) +#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \ + PORT0_DEFAULT_ATTEN_SHIFT) + +#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \ + PORT1_PORT_TYPE_SHIFT) +#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \ + PORT1_LOCAL_ATTEN_SHIFT) +#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \ + PORT1_REMOTE_ATTEN_SHIFT) +#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \ + PORT1_DEFAULT_ATTEN_SHIFT) + +#define QSFP_MAX_POWER_SHIFT 0 +#define TX_NO_EQ_SHIFT 4 +#define TX_EQ_SHIFT 25 +#define RX_SHIFT 46 + +#define QSFP_MAX_POWER_MASK 0xFUL +#define TX_NO_EQ_MASK 0x1FFFFFUL +#define TX_EQ_MASK 0x1FFFFFUL +#define RX_MASK 0xFFFFUL + +#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \ + QSFP_MAX_POWER_SHIFT) +#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT) +#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT) +#define RX_SMASK (RX_MASK << RX_SHIFT) + +#define TX_PRECUR_SHIFT 0 +#define TX_ATTN_SHIFT 4 +#define QSFP_TX_CDR_APPLY_SHIFT 9 +#define QSFP_TX_EQ_APPLY_SHIFT 10 +#define QSFP_TX_CDR_SHIFT 11 +#define QSFP_TX_EQ_SHIFT 12 +#define TX_POSTCUR_SHIFT 16 + +#define TX_PRECUR_MASK 0xFUL +#define TX_ATTN_MASK 0x1FUL +#define QSFP_TX_CDR_APPLY_MASK 0x1UL +#define QSFP_TX_EQ_APPLY_MASK 0x1UL +#define QSFP_TX_CDR_MASK 0x1UL +#define QSFP_TX_EQ_MASK 0xFUL +#define TX_POSTCUR_MASK 0x1FUL + +#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT) +#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT) +#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \ + QSFP_TX_CDR_APPLY_SHIFT) +#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \ + QSFP_TX_EQ_APPLY_SHIFT) +#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT) +#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT) +#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT) + +#define QSFP_RX_CDR_APPLY_SHIFT 0 +#define QSFP_RX_EMP_APPLY_SHIFT 1 +#define QSFP_RX_AMP_APPLY_SHIFT 2 +#define QSFP_RX_CDR_SHIFT 3 +#define QSFP_RX_EMP_SHIFT 4 +#define QSFP_RX_AMP_SHIFT 8 + +#define QSFP_RX_CDR_APPLY_MASK 0x1UL +#define QSFP_RX_EMP_APPLY_MASK 0x1UL +#define QSFP_RX_AMP_APPLY_MASK 0x1UL +#define QSFP_RX_CDR_MASK 0x1UL +#define QSFP_RX_EMP_MASK 0xFUL +#define QSFP_RX_AMP_MASK 0x3UL + +#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \ + QSFP_RX_CDR_APPLY_SHIFT) +#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \ + QSFP_RX_EMP_APPLY_SHIFT) +#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \ + QSFP_RX_AMP_APPLY_SHIFT) +#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT) +#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT) +#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT) + +#define BITMAP_VERSION 1 +#define BITMAP_VERSION_SHIFT 44 +#define BITMAP_VERSION_MASK 0xFUL +#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \ + BITMAP_VERSION_SHIFT) +#define CHECKSUM_SHIFT 48 +#define CHECKSUM_MASK 0xFFFFUL +#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT) + /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9fc75e7e8781..d752d6768a49 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp) static void flush_iowait(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; + seqlock_t *lock = priv->s_iowait.lock; - write_seqlock_irqsave(&dev->iowait_lock, flags); + if (!lock) + return; + write_seqlock_irqsave(lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(lock, flags); } static inline int opa_mtu_enum_to_int(int mtu) @@ -543,6 +546,7 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } @@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 8bc5013f39a1..9db260fe782a 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, rvt_get_mr(ps->s_txreq->mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); @@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, bth2 = mask_psn(qp->s_ack_rdma_psn++); } else { /* COMPARE_SWAP or FETCH_ADD */ - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); @@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; if (ps->s_txreq->mr) rvt_get_mr(ps->s_txreq->mr); @@ -335,7 +335,7 @@ normal: */ qp->s_ack_state = OP(SEND_ONLY); qp->s_flags &= ~RVT_S_ACK_PENDING; - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; if (qp->s_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) | @@ -351,7 +351,7 @@ normal: qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_size = len; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; @@ -801,8 +801,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = ss; - qp->s_cur_size = len; + ps->s_txreq->ss = ss; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header( qp, ohdr, @@ -1146,8 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; - unsigned i; u32 opcode; u32 psn; @@ -1195,22 +1193,8 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_put_swqe(wqe); + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1240,9 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct hfi1_ibport *ibp) { - struct ib_wc wc; - unsigned i; - lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a @@ -1253,28 +1234,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -2295,7 +2262,7 @@ send_last: hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last); rvt_put_ss(&qp->r_sge); qp->r_msn++; - if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) + if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -2410,8 +2377,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a1576aea4756..717ed4b159d3 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -239,16 +239,6 @@ bail: return ret; } -static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index) -{ - if (!index) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - - return cpu_to_be64(ppd->guid); - } - return ibp->guids[index - 1]; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; hdr->sgid.global.interface_id = - grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ? - ibp->guids[grh->sgid_index - 1] : - cpu_to_be64(ppd_from_ibp(ibp)->guid); + grh->sgid_index < HFI1_GUIDS_PER_PORT ? + get_sguid(ibp, grh->sgid_index) : + get_sguid(ibp, HFI1_PORT_GUID_INDEX); hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ @@ -777,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth1; /* Construct the header. */ - extra_bytes = -qp->s_cur_size & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; + extra_bytes = -ps->s_txreq->s_cur_size & 3; + nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { qp->s_hdrwords += hfi1_make_grh(ibp, @@ -952,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -964,32 +953,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index fd39bcaa062d..7102a076146d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde, sde->head_sn, tx->sn); sde->head_sn++; #endif - sdma_txclean(sde->dd, tx); + __sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); if (wait && iowait_sdma_dec(wait)) @@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) } /** - * sdma_txclean() - clean tx of mappings, descp *kmalloc's + * __sdma_txclean() - clean tx of mappings, descp *kmalloc's * @dd: hfi1_devdata for unmapping * @tx: tx request to clean * @@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) * The code can be called multiple times without issue. * */ -void sdma_txclean( +void __sdma_txclean( struct hfi1_devdata *dd, struct sdma_txreq *tx) { @@ -3080,7 +3080,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) tx->descp[i] = tx->descs[i]; return 0; enomem: - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOMEM; } @@ -3109,14 +3109,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } /* If coalesce buffer is allocated, copy data into it */ if (tx->coalesce_buf) { if (type == SDMA_MAP_NONE) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3124,7 +3124,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, kvaddr = kmap(page); kvaddr += offset; } else if (WARN_ON(!kvaddr)) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3154,7 +3154,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -3196,7 +3196,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } } diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 56257ea3598f..21f1e2834f37 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, int type, void *kvaddr, struct page *page, unsigned long offset, u16 len); int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *); -void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); +void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); + +static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx) +{ + if (tx->num_desc) + __sdma_txclean(dd, tx); +} /* helpers used by public routines */ static inline void _sdma_close_tx(struct hfi1_devdata *dd, @@ -753,7 +759,7 @@ static inline int sdma_txadd_page( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5e6d1bac4914..b141a78ae38b 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -258,8 +258,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = &qp->s_sge; - qp->s_cur_size = len; + ps->s_txreq->ss = &qp->s_sge; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); /* pbc */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 97ae24b6314c..c071955c0272 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -354,8 +354,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; - qp->s_cur_size = wqe->length; - qp->s_cur_sge = &qp->s_sge; + ps->s_txreq->s_cur_size = wqe->length; + ps->s_txreq->ss = &qp->s_sge; qp->s_srate = ah_attr->static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); qp->s_wqe = wqe; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a761f804111e..663980ef01a8 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_MASK 0xff #define AHG_KDETH_INTR_SHIFT 12 +#define AHG_KDETH_SH_SHIFT 13 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_OM_LARGE 64 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) -/* Last packet in the request */ -#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) +/* Tx request flag bits */ +#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ +#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 @@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) tx->busycount = 0; INIT_LIST_HEAD(&tx->list); + /* + * For the last packet set the ACK request + * and disable header suppression. + */ if (req->seqnum == req->info.npkts - 1) - tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT; + tx->flags |= (TXREQ_FLAGS_REQ_ACK | + TXREQ_FLAGS_REQ_DISABLE_SH); /* * Calculate the payload size - this is min of the fragment @@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } datalen = compute_data_length(req, tx); + + /* + * Disable header suppression for the payload <= 8DWS. + * If there is an uncorrectable error in the receive + * data FIFO when the received payload size is less than + * or equal to 8DWS then the RxDmaDataFifoRdUncErr is + * not reported.There is set RHF.EccErr if the header + * is not suppressed. + */ if (!datalen) { SDMA_DBG(req, "Request has data but pkt len is 0"); ret = -EFAULT; goto free_tx; + } else if (datalen <= 32) { + tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; } } @@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) LRH2PBC(lrhlen); tx->hdr.pbc[0] = cpu_to_le16(pbclen); } + ret = check_header_template(req, &tx->hdr, + lrhlen, datalen); + if (ret) + goto free_tx; ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, sizeof(tx->hdr) + datalen, @@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req, req->seqnum)); /* Set ACK request on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) hdr->bth[2] |= cpu_to_be32(1UL << 31); /* Set the new offset */ @@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req, /* Set KDETH.TID based on value for this TID */ KDETH_SET(hdr->kdeth.ver_tid_offset, TID, EXP_TID_GET(tidval, IDX)); - /* Clear KDETH.SH only on the last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + /* Clear KDETH.SH when DISABLE_SH flag is set */ + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); /* * Set the KDETH.OFFSET and KDETH.OM based on size of @@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* BTH.PSN and BTH.A */ val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); @@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | ((req->tidoffset / req->omfactor) & 0x7fff))); - /* KDETH.TIDCtrl, KDETH.TID */ + /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | - (EXP_TID_GET(tidval, IDX) & 0x3ff)); - /* Clear KDETH.SH on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { - val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) << - AHG_KDETH_INTR_SHIFT); - val &= cpu_to_le16(~(1U << 13)); - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + (EXP_TID_GET(tidval, IDX) & 0x3ff)); + + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { + val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } else { - AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val); + val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? + cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : + cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } + + AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4b7a16ceb362..95ed4d6da510 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -297,22 +297,6 @@ static inline int wss_exceeds_threshold(void) } /* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, - [IB_WR_SEND_WITH_INV] = IB_WC_SEND, - [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, - [IB_WR_REG_MR] = IB_WC_REG_MR -}; - -/* * Length of header by opcode, 0 --> not supported */ const u8 hdr_len_by_opcode[256] = { @@ -694,6 +678,7 @@ static void mem_timer(unsigned long data) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -769,6 +754,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); rvt_get_qp(qp); } @@ -788,10 +774,10 @@ static int wait_kmem(struct hfi1_ibdev *dev, */ static noinline int build_verbs_ulp_payload( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx) { + struct rvt_sge_state *ss = tx->ss; struct rvt_sge *sg_list = ss->sg_list; struct rvt_sge sge = ss->sge; u8 num_sge = ss->num_sge; @@ -835,7 +821,6 @@ bail_txadd: /* New API */ static int build_verbs_tx_desc( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, struct hfi1_ahg_info *ahg_info, @@ -879,9 +864,9 @@ static int build_verbs_tx_desc( goto bail_txadd; } - /* add the ulp payload - if any. ss can be NULL for acks */ - if (ss) - ret = build_verbs_ulp_payload(sde, ss, length, tx); + /* add the ulp payload - if any. tx->ss can be NULL for acks */ + if (tx->ss) + ret = build_verbs_ulp_payload(sde, length, tx); bail_txadd: return ret; } @@ -892,8 +877,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + u32 len = ps->s_txreq->s_cur_size; u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; @@ -918,7 +902,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, plen); } tx->wqe = qp->s_wqe; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); + ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); if (unlikely(ret)) goto bail_build; } @@ -980,6 +964,7 @@ static int pio_wait(struct rvt_qp *qp, qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ @@ -1008,8 +993,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, { struct hfi1_qp_priv *priv = qp->priv; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + struct rvt_sge_state *ss = ps->s_txreq->ss; + u32 len = ps->s_txreq->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; @@ -1237,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, u8 op = get_opcode(h); if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && + tx->s_cur_size <= min(piothreshold, qp->pmtu) && (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) @@ -1483,15 +1468,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid) { struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - if (guid_index == 0) - *guid = cpu_to_be64(ppd->guid); - else if (guid_index < HFI1_GUIDS_PER_PORT) - *guid = ibp->guids[guid_index - 1]; - else + if (guid_index >= HFI1_GUIDS_PER_PORT) return -EINVAL; + *guid = get_sguid(ibp, guid_index); return 0; } @@ -1610,6 +1591,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, dc8051_ver_min(ver)); } +static const char * const driver_cntr_names[] = { + /* must be element 0*/ + "DRIVER_KernIntr", + "DRIVER_ErrorIntr", + "DRIVER_Tx_Errs", + "DRIVER_Rcv_Errs", + "DRIVER_HW_Errs", + "DRIVER_NoPIOBufs", + "DRIVER_CtxtsOpen", + "DRIVER_RcvLen_Errs", + "DRIVER_EgrBufFull", + "DRIVER_EgrHdrFull" +}; + +static const char **dev_cntr_names; +static const char **port_cntr_names; +static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); +static int num_dev_cntrs; +static int num_port_cntrs; +static int cntr_names_initialized; + +/* + * Convert a list of names separated by '\n' into an array of NULL terminated + * strings. Optionally some entries can be reserved in the array to hold extra + * external strings. + */ +static int init_cntr_names(const char *names_in, + const int names_len, + int num_extra_names, + int *num_cntrs, + const char ***cntr_names) +{ + char *names_out, *p, **q; + int i, n; + + n = 0; + for (i = 0; i < names_len; i++) + if (names_in[i] == '\n') + n++; + + names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, + GFP_KERNEL); + if (!names_out) { + *num_cntrs = 0; + *cntr_names = NULL; + return -ENOMEM; + } + + p = names_out + (n + num_extra_names) * sizeof(char *); + memcpy(p, names_in, names_len); + + q = (char **)names_out; + for (i = 0; i < n; i++) { + q[i] = p; + p = strchr(p, '\n'); + *p++ = '\0'; + } + + *num_cntrs = n; + *cntr_names = (const char **)names_out; + return 0; +} + +static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + int i, err; + + if (!cntr_names_initialized) { + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + + err = init_cntr_names(dd->cntrnames, + dd->cntrnameslen, + num_driver_cntrs, + &num_dev_cntrs, + &dev_cntr_names); + if (err) + return NULL; + + for (i = 0; i < num_driver_cntrs; i++) + dev_cntr_names[num_dev_cntrs + i] = + driver_cntr_names[i]; + + err = init_cntr_names(dd->portcntrnames, + dd->portcntrnameslen, + 0, + &num_port_cntrs, + &port_cntr_names); + if (err) { + kfree(dev_cntr_names); + dev_cntr_names = NULL; + return NULL; + } + cntr_names_initialized = 1; + } + + if (!port_num) + return rdma_alloc_hw_stats_struct( + dev_cntr_names, + num_dev_cntrs + num_driver_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + else + return rdma_alloc_hw_stats_struct( + port_cntr_names, + num_port_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static u64 hfi1_sps_ints(void) +{ + unsigned long flags; + struct hfi1_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&hfi1_devs_lock, flags); + list_for_each_entry(dd, &hfi1_dev_list, list) { + sps_ints += get_all_cpu_total(dd->int_counter); + } + spin_unlock_irqrestore(&hfi1_devs_lock, flags); + return sps_ints; +} + +static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port, int index) +{ + u64 *values; + int count; + + if (!port) { + u64 *stats = (u64 *)&hfi1_stats; + int i; + + hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values); + values[num_dev_cntrs] = hfi1_sps_ints(); + for (i = 1; i < num_driver_cntrs; i++) + values[num_dev_cntrs + i] = stats[i]; + count = num_dev_cntrs + num_driver_cntrs; + } else { + struct hfi1_ibport *ibp = to_iport(ibdev, port); + + hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values); + count = num_port_cntrs; + } + + memcpy(stats->value, values, count * sizeof(u64)); + return count; +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1620,6 +1749,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) struct hfi1_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct hfi1_pportdata *ppd = dd->pport; + struct hfi1_ibport *ibp = &ppd->ibport_data; unsigned i; int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; @@ -1632,6 +1762,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); seqlock_init(&dev->iowait_lock); + seqlock_init(&dev->txwait_lock); INIT_LIST_HEAD(&dev->txwait); INIT_LIST_HEAD(&dev->memwait); @@ -1639,20 +1770,24 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) if (ret) goto err_verbs_txreq; + /* Use first-port GUID as node guid */ + ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); + /* * The system image GUID is supposed to be the same for all * HFIs in a single system but since there can be other * device types in the system, we can't be sure this is unique. */ if (!ib_hfi1_sys_image_guid) - ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid); + ib_hfi1_sys_image_guid = ibdev->node_guid; lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); ibdev->owner = THIS_MODULE; - ibdev->node_guid = cpu_to_be64(ppd->guid); ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = modify_device; + ibdev->alloc_hw_stats = alloc_hw_stats; + ibdev->get_hw_stats = get_hw_stats; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1767,6 +1902,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + + kfree(dev_cntr_names); + kfree(port_cntr_names); + cntr_names_initialized = 0; } void hfi1_cnp_rcv(struct hfi1_packet *packet) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 1c3815d89eb7..e6b893010e6d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -73,7 +73,6 @@ struct hfi1_packet; #include "iowait.h" #define HFI1_MAX_RDMA_ATOMIC 16 -#define HFI1_GUIDS_PER_PORT 5 /* * Increment this value if any changes that break userspace ABI @@ -169,8 +168,6 @@ struct hfi1_ibport { struct rvt_qp __rcu *qp[2]; struct rvt_ibport rvp; - __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ - /* the first 16 entries are sl_to_vl for !OPA */ u8 sl_to_sc[32]; u8 sc_to_sl[32]; @@ -180,18 +177,19 @@ struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ /* QP numbers are shared by all IB ports */ - /* protect wait lists */ - seqlock_t iowait_lock; + /* protect txwait list */ + seqlock_t txwait_lock ____cacheline_aligned_in_smp; struct list_head txwait; /* list for wait verbs_txreq */ struct list_head memwait; /* list for wait kernel memory */ - struct list_head txreq_free; struct kmem_cache *verbs_txreq_cache; - struct timer_list mem_timer; + u64 n_txwait; + u64 n_kmem_wait; + /* protect iowait lists */ + seqlock_t iowait_lock ____cacheline_aligned_in_smp; u64 n_piowait; u64 n_piodrain; - u64 n_txwait; - u64 n_kmem_wait; + struct timer_list mem_timer; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 094ab829ec42..5d23172c470f 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx) kmem_cache_free(dev->verbs_txreq_cache, tx); do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&dev->txwait_lock); if (!list_empty(&dev->txwait)) { struct iowait *wait; - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&dev->txwait_lock, flags); wait = list_first_entry(&dev->txwait, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&dev->txwait_lock, flags); hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&dev->txwait_lock, seq)); } struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, @@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, { struct verbs_txreq *tx = ERR_PTR(-EBUSY); - write_seqlock(&dev->iowait_lock); + write_seqlock(&dev->txwait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; @@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, dev->n_txwait++; qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); + priv->s_iowait.lock = &dev->txwait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } out: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&dev->txwait_lock); return tx; } diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 5660897593ba..76216f2ef35a 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -65,6 +65,7 @@ struct verbs_txreq { struct sdma_engine *sde; struct send_context *psc; u16 hdr_dwords; + u16 s_cur_size; }; struct hfi1_ibdev; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 728e0a030d2e..2b5982f743ef 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc( - &qp->refcount); + rvt_get_qp(qp); list_add_tail( &qp->rspwait, &rcd->qp_wait_list); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 2097512e75aa..031433cb7206 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -941,8 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; - unsigned i; u32 opcode; u32 psn; @@ -988,22 +986,8 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_put_swqe(wqe); + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before resending, @@ -1032,9 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct qib_ibport *ibp) { - struct ib_wc wc; - unsigned i; - /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1044,28 +1025,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else this_cpu_inc(*ibp->rvp.rc_delayed_comp); @@ -2112,8 +2079,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index de1bde5950f5..e54a2feeeb10 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -805,32 +804,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 954f15064514..4b54c0ddd08a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -114,19 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); /* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_qib_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD -}; - -/* * System image GUID. */ __be64 ib_qib_sys_image_guid; @@ -464,7 +451,7 @@ static void mem_timer(unsigned long data) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); } @@ -477,8 +464,7 @@ static void mem_timer(unsigned long data) qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -762,7 +748,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags); @@ -772,8 +758,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } else spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); } @@ -808,7 +793,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) break; avail -= qpp->s_tx->txreq.sg_count; list_del_init(&qpp->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } @@ -822,8 +807,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) qib_schedule_send(qp); } spin_unlock(&qp->s_lock); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -1288,7 +1272,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } dd->f_wantpiobuf_intr(dd, 0); @@ -1306,8 +1290,7 @@ full: spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify qib_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6d9904a4a0ab..4d0b6992e847 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - smp_read_barrier_depends(); /* see rvt_cq_exit */ - worker = cq->rdi->worker; - if (likely(worker)) { + spin_lock(&cq->rdi->n_cqs_lock); + if (likely(cq->rdi->worker)) { cq->notify = RVT_CQ_NONE; cq->triggered++; - kthread_queue_work(worker, &cq->comptask); + kthread_queue_work(cq->rdi->worker, &cq->comptask); } + spin_unlock(&cq->rdi->n_cqs_lock); } spin_unlock_irqrestore(&cq->lock, flags); @@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, } } - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_cqs_allocated++; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) { spin_lock_irq(&rdi->pending_lock); @@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_dev_info *rdi = cq->rdi; kthread_flush_work(&cq->comptask); - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else @@ -504,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { - int ret = 0; int cpu; - struct task_struct *task; + struct kthread_worker *worker; if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); - rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); - if (!rdi->worker) - return -ENOMEM; - kthread_init_worker(rdi->worker); - task = kthread_create_on_node( - kthread_worker_fn, - rdi->worker, - rdi->dparms.node, - "%s", rdi->dparms.cq_name); - if (IS_ERR(task)) { - kfree(rdi->worker); - rdi->worker = NULL; - return PTR_ERR(task); - } - set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); - kthread_bind(task, cpu); - wake_up_process(task); - return ret; + worker = kthread_create_worker_on_cpu(cpu, 0, + "%s", rdi->dparms.cq_name); + if (IS_ERR(worker)) + return PTR_ERR(worker); + + set_user_nice(worker->task, MIN_NICE); + rdi->worker = worker; + return 0; } /** @@ -541,13 +530,14 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; - worker = rdi->worker; - if (!worker) + /* block future queuing from send_complete() */ + spin_lock_irq(&rdi->n_cqs_lock); + if (!rdi->worker) { + spin_unlock_irq(&rdi->n_cqs_lock); return; - /* blocks future queuing from send_complete() */ + } rdi->worker = NULL; - smp_wmb(); /* See rdi_cq_enter */ - kthread_flush_worker(worker); - kthread_stop(worker->task); - kfree(worker); + spin_unlock_irq(&rdi->n_cqs_lock); + + kthread_destroy_worker(worker); } diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 983d319ac976..05c8c2afb0e3 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) goto bail; mqp->qp = qp; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); bail: return mqp; @@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) struct rvt_qp *qp = mqp->qp; /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); kfree(mqp); } diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 46b64970058e..52fd15276ee6 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -51,6 +51,7 @@ #include <rdma/rdma_vt.h> #include "vt.h" #include "mr.h" +#include "trace.h" /** * rvt_driver_mr_init - Init MR resources per driver @@ -84,6 +85,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) lkey_table_size = rdi->dparms.lkey_table_size; } rdi->lkey_table.max = 1 << lkey_table_size; + rdi->lkey_table.shift = 32 - lkey_table_size; lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) vmalloc_node(lk_tab_size, rdi->dparms.node); @@ -402,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = umem->page_size; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); n++; if (n == RVT_SEGSZ) { m++; @@ -506,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; return 0; @@ -692,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; i++) { fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; fmr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); if (++n == RVT_SEGSZ) { m++; n = 0; @@ -774,7 +779,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_mregion *mr; unsigned n, m; size_t off; - struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); /* * We use LKEY == zero for kernel virtual addresses @@ -782,12 +786,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, */ rcu_read_lock(); if (sge->lkey == 0) { + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + if (pd->user) goto bail; mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); isge->mr = mr; @@ -798,8 +804,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; @@ -809,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; @@ -887,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference(rdi->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); sge->mr = mr; @@ -899,8 +904,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, goto ok; } - mr = rcu_dereference( - rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; @@ -909,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6500c3b5a89c..2a13ac660f2b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { }; EXPORT_SYMBOL(ib_rvt_state_ops); +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_rvt_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; +EXPORT_SYMBOL(ib_rvt_wc_opcode); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -884,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, return ret; bail_ip: - kref_put(&qp->ip->ref, rvt_release_mmap_info); + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 6c0457db5499..e2d23acb6a7d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -45,143 +45,10 @@ * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR rdmavt - -#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __RDMAVT_TRACE_H - -#include <linux/tracepoint.h> -#include <linux/trace_seq.h> - -#include <rdma/ib_verbs.h> -#include <rdma/rdma_vt.h> - #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) #define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rdmavt - -TRACE_EVENT(rvt_dbg, - TP_PROTO(struct rvt_dev_info *rdi, - const char *msg), - TP_ARGS(rdi, msg), - TP_STRUCT__entry( - RDI_DEV_ENTRY(rdi) - __string(msg, msg) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); - ), - TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_qphash -DECLARE_EVENT_CLASS(rvt_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_tx - -#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } -#define show_wr_opcode(opcode) \ -__print_symbolic(opcode, \ - wr_opcode_name(RDMA_WRITE), \ - wr_opcode_name(RDMA_WRITE_WITH_IMM), \ - wr_opcode_name(SEND), \ - wr_opcode_name(SEND_WITH_IMM), \ - wr_opcode_name(RDMA_READ), \ - wr_opcode_name(ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ - wr_opcode_name(LSO), \ - wr_opcode_name(SEND_WITH_INV), \ - wr_opcode_name(RDMA_READ_WITH_INV), \ - wr_opcode_name(LOCAL_INV), \ - wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) - -#define POS_PRN \ -"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" - -TRACE_EVENT( - rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u64, wr_id) - __field(u32, qpn) - __field(u32, psn) - __field(u32, lpsn) - __field(u32, length) - __field(u32, opcode) - __field(u32, size) - __field(u32, avail) - __field(u32, head) - __field(u32, last) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->wr_id = wqe->wr.wr_id; - __entry->qpn = qp->ibqp.qp_num; - __entry->psn = wqe->psn; - __entry->lpsn = wqe->lpsn; - __entry->length = wqe->length; - __entry->opcode = wqe->wr.opcode; - __entry->size = qp->s_size; - __entry->avail = qp->s_avail; - __entry->head = qp->s_head; - __entry->last = qp->s_last; - ), - TP_printk( - POS_PRN, - __get_str(dev), - __entry->wr_id, - __entry->qpn, - __entry->psn, - __entry->lpsn, - __entry->length, - __entry->opcode, show_wr_opcode(__entry->opcode), - __entry->size, - __entry->avail, - __entry->head, - __entry->last - ) -); - -#endif /* __RDMAVT_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include <trace/define_trace.h> +#include "trace_rvt.h" +#include "trace_qp.h" +#include "trace_tx.h" +#include "trace_mr.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h new file mode 100644 index 000000000000..3318a6c36373 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -0,0 +1,112 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_MR_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> +#include <rdma/rdmavt_mr.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_mr +DECLARE_EVENT_CLASS( + rvt_mr_template, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) + __field(void *, vaddr) + __field(struct page *, page) + __field(size_t, len) + __field(u32, lkey) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); + __entry->vaddr = v; + __entry->page = virt_to_page(v); + __entry->m = m; + __entry->n = n; + __entry->len = len; + ), + TP_printk( + "[%s] vaddr %p page %p m %u n %u len %ld", + __get_str(dev), + __entry->vaddr, + __entry->page, + __entry->m, + __entry->n, + __entry->len + ) +); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_page_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_fmr_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_user_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +#endif /* __RVT_TRACE_MR_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mr +#include <trace/define_trace.h> diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h new file mode 100644 index 000000000000..4c77a3119bda --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_QP_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qp + +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + + +#endif /* __RVT_TRACE_QP_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_qp +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h new file mode 100644 index 000000000000..746f33461d9a --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -0,0 +1,81 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_RVT_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt + +TRACE_EVENT(rvt_dbg, + TP_PROTO(struct rvt_dev_info *rdi, + const char *msg), + TP_ARGS(rdi, msg), + TP_STRUCT__entry( + RDI_DEV_ENTRY(rdi) + __string(msg, msg) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(rdi); + __assign_str(msg, msg); + ), + TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) +); + +#endif /* __RVT_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rvt +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h new file mode 100644 index 000000000000..0e03173662d8 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -0,0 +1,132 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_TX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include <rdma/ib_verbs.h> +#include <rdma/rdma_vt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + +#endif /* __RVT_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include <trace/define_trace.h> + |