diff options
-rw-r--r-- | MAINTAINERS | 13 | ||||
-rw-r--r-- | drivers/infiniband/Kconfig | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/Makefile | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/Kconfig (renamed from drivers/staging/rdma/hfi1/Kconfig) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/Makefile (renamed from drivers/staging/rdma/hfi1/Makefile) | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/affinity.c (renamed from drivers/staging/rdma/hfi1/affinity.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/affinity.h (renamed from drivers/staging/rdma/hfi1/affinity.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/aspm.h (renamed from drivers/staging/rdma/hfi1/aspm.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.c (renamed from drivers/staging/rdma/hfi1/chip.c) | 41 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.h (renamed from drivers/staging/rdma/hfi1/chip.h) | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip_registers.h (renamed from drivers/staging/rdma/hfi1/chip_registers.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/common.h (renamed from drivers/staging/rdma/hfi1/common.h) | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/debugfs.c (renamed from drivers/staging/rdma/hfi1/debugfs.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/debugfs.h (renamed from drivers/staging/rdma/hfi1/debugfs.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/device.c (renamed from drivers/staging/rdma/hfi1/device.c) | 18 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/device.h (renamed from drivers/staging/rdma/hfi1/device.h) | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/dma.c (renamed from drivers/staging/rdma/hfi1/dma.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/driver.c (renamed from drivers/staging/rdma/hfi1/driver.c) | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/efivar.c (renamed from drivers/staging/rdma/hfi1/efivar.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/efivar.h (renamed from drivers/staging/rdma/hfi1/efivar.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/eprom.c | 102 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/eprom.h (renamed from drivers/staging/rdma/hfi1/eprom.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/file_ops.c (renamed from drivers/staging/rdma/hfi1/file_ops.c) | 549 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/firmware.c (renamed from drivers/staging/rdma/hfi1/firmware.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/hfi.h (renamed from drivers/staging/rdma/hfi1/hfi.h) | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/init.c (renamed from drivers/staging/rdma/hfi1/init.c) | 22 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/intr.c (renamed from drivers/staging/rdma/hfi1/intr.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/iowait.h (renamed from drivers/staging/rdma/hfi1/iowait.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mad.c (renamed from drivers/staging/rdma/hfi1/mad.c) | 99 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mad.h (renamed from drivers/staging/rdma/hfi1/mad.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mmu_rb.c (renamed from drivers/staging/rdma/hfi1/mmu_rb.c) | 22 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mmu_rb.h (renamed from drivers/staging/rdma/hfi1/mmu_rb.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/opa_compat.h (renamed from drivers/staging/rdma/hfi1/opa_compat.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pcie.c (renamed from drivers/staging/rdma/hfi1/pcie.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.c (renamed from drivers/staging/rdma/hfi1/pio.c) | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.h (renamed from drivers/staging/rdma/hfi1/pio.h) | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio_copy.c (renamed from drivers/staging/rdma/hfi1/pio_copy.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/platform.c (renamed from drivers/staging/rdma/hfi1/platform.c) | 27 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/platform.h (renamed from drivers/staging/rdma/hfi1/platform.h) | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qp.c (renamed from drivers/staging/rdma/hfi1/qp.c) | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qp.h (renamed from drivers/staging/rdma/hfi1/qp.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qsfp.c (renamed from drivers/staging/rdma/hfi1/qsfp.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qsfp.h (renamed from drivers/staging/rdma/hfi1/qsfp.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/rc.c (renamed from drivers/staging/rdma/hfi1/rc.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/ruc.c (renamed from drivers/staging/rdma/hfi1/ruc.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sdma.c (renamed from drivers/staging/rdma/hfi1/sdma.c) | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sdma.h (renamed from drivers/staging/rdma/hfi1/sdma.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sdma_txreq.h (renamed from drivers/staging/rdma/hfi1/sdma_txreq.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sysfs.c (renamed from drivers/staging/rdma/hfi1/sysfs.c) | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/trace.c (renamed from drivers/staging/rdma/hfi1/trace.c) | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/trace.h (renamed from drivers/staging/rdma/hfi1/trace.h) | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/twsi.c (renamed from drivers/staging/rdma/hfi1/twsi.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/twsi.h (renamed from drivers/staging/rdma/hfi1/twsi.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/uc.c (renamed from drivers/staging/rdma/hfi1/uc.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/ud.c (renamed from drivers/staging/rdma/hfi1/ud.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_exp_rcv.c (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_exp_rcv.h (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_pages.c (renamed from drivers/staging/rdma/hfi1/user_pages.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_sdma.c (renamed from drivers/staging/rdma/hfi1/user_sdma.c) | 18 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_sdma.h (renamed from drivers/staging/rdma/hfi1/user_sdma.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs.c (renamed from drivers/staging/rdma/hfi1/verbs.c) | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs.h (renamed from drivers/staging/rdma/hfi1/verbs.h) | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs_txreq.c (renamed from drivers/staging/rdma/hfi1/verbs_txreq.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs_txreq.h (renamed from drivers/staging/rdma/hfi1/verbs_txreq.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_iba7322.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/mr.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.c | 30 | ||||
-rw-r--r-- | drivers/staging/rdma/Kconfig | 2 | ||||
-rw-r--r-- | drivers/staging/rdma/Makefile | 1 | ||||
-rw-r--r-- | drivers/staging/rdma/hfi1/TODO | 6 | ||||
-rw-r--r-- | drivers/staging/rdma/hfi1/diag.c | 1925 | ||||
-rw-r--r-- | drivers/staging/rdma/hfi1/eprom.c | 471 | ||||
-rw-r--r-- | include/rdma/ib_pack.h | 5 | ||||
-rw-r--r-- | include/rdma/rdma_vt.h | 13 | ||||
-rw-r--r-- | include/rdma/rdmavt_qp.h | 5 | ||||
-rw-r--r-- | include/uapi/rdma/hfi/hfi1_user.h | 80 |
78 files changed, 551 insertions, 2994 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c8025946eaae..98234560cfdc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5086,6 +5086,13 @@ F: drivers/block/cciss* F: include/linux/cciss_ioctl.h F: include/uapi/linux/cciss_ioctl.h +HFI1 DRIVER +M: Mike Marciniszyn <mike.marciniszyn@intel.com> +M: Dennis Dalessandro <dennis.dalessandro@intel.com> +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/hw/hfi1 + HFS FILESYSTEM L: linux-fsdevel@vger.kernel.org S: Orphan @@ -10661,12 +10668,6 @@ M: Arnaud Patard <arnaud.patard@rtp-net.org> S: Odd Fixes F: drivers/staging/xgifb/ -HFI1 DRIVER -M: Mike Marciniszyn <infinipath@intel.com> -L: linux-rdma@vger.kernel.org -S: Supported -F: drivers/staging/rdma/hfi1 - STARFIRE/DURALAN NETWORK DRIVER M: Ion Badulescu <ionut@badula.org> S: Odd Fixes diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 6425c0e5d18a..2137adfbd8c3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/hw/hfi1/Kconfig" + endif # INFINIBAND diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index c7ad0a4c8b15..c0c7cf8af3f4 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ +obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index a925fb0db706..a925fb0db706 100644 --- a/drivers/staging/rdma/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 8dc59382ee96..9b5382c94b0c 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ +hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 6e7050ab9e16..6e7050ab9e16 100644 --- a/drivers/staging/rdma/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 20f52fe74091..20f52fe74091 100644 --- a/drivers/staging/rdma/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 0d58fe3b49b5..0d58fe3b49b5 100644 --- a/drivers/staging/rdma/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index dcae8e723f98..3b876da745a1 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6105,7 +6106,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) } /* this access is valid only when the link is up */ - if ((ppd->host_link_state & HLS_UP) == 0) { + if (ppd->host_link_state & HLS_DOWN) { dd_dev_info(dd, "%s: link state %s not up\n", __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; @@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work) } reset_neighbor_info(ppd); + if (ppd->mgmt_allowed) + remove_full_mgmt_pkey(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd) +{ + ppd->pkeys[2] = 0; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); +} + /* * Convert the given link width to the OPA link width bitmask. */ @@ -7429,7 +7438,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) retry: mutex_lock(&ppd->hls_lock); /* only apply if the link is up */ - if (!(ppd->host_link_state & HLS_UP)) { + if (ppd->host_link_state & HLS_DOWN) { /* still going up..wait and retry */ if (ppd->host_link_state & HLS_GOING_UP) { if (++tries < 1000) { @@ -9212,9 +9221,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd) /* Reset the QSFP */ mask = (u64)QSFP_HFI0_RESET_N; - qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); - qsfp_mask |= mask; - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); @@ -9252,6 +9258,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, dd_dev_info(dd, "%s: QSFP cable temperature too low\n", __func__); + /* + * The remaining alarms/warnings don't matter if the link is down. + */ + if (ppd->host_link_state & HLS_DOWN) + return 0; + if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) dd_dev_info(dd, "%s: QSFP supply voltage too high\n", @@ -9346,9 +9358,8 @@ void qsfp_event(struct work_struct *work) return; /* - * Turn DC back on after cables has been - * re-inserted. Up until now, the DC has been in - * reset to save power. + * Turn DC back on after cable has been re-inserted. Up until + * now, the DC has been in reset to save power. */ dc_start(dd); @@ -9480,7 +9491,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd) return ret; } - /* tune the SERDES to a ballpark setting for + get_port_type(ppd); + if (ppd->port_type == PORT_TYPE_QSFP) { + set_qsfp_int_n(ppd, 0); + wait_for_qsfp_init(ppd); + set_qsfp_int_n(ppd, 1); + } + + /* + * Tune the SerDes to a ballpark setting for * optimal signal and bit error rate * Needs to be done before starting the link */ @@ -10074,7 +10093,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) */ u32 driver_logical_state(struct hfi1_pportdata *ppd) { - if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) + if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN)) return IB_PORT_DOWN; switch (ppd->host_link_state & HLS_UP) { @@ -14578,7 +14597,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, (reason), (ret)) /* - * Initialize the Avago Thermal sensor. + * Initialize the thermal sensor. * * After initialization, enable polling of thermal sensor through * SBus interface. In order for this to work, the SBus Master diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 1948706fff1a..66a327978739 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -398,6 +398,12 @@ /* Lane ID for general configuration registers */ #define GENERAL_CONFIG 4 +/* LINK_TUNING_PARAMETERS fields */ +#define TUNING_METHOD_SHIFT 24 + +/* LINK_OPTIMIZATION_SETTINGS fields */ +#define ENABLE_EXT_DEV_CONFIG_SHIFT 24 + /* LOAD_DATA 8051 command shifts and fields */ #define LOAD_DATA_FIELD_ID_SHIFT 40 #define LOAD_DATA_FIELD_ID_MASK 0xfull diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 8744de6667c2..8744de6667c2 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index e9b6bb322025..fcc9c217a97a 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -178,7 +178,8 @@ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_NO_INTEGRITY) -#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR) +#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ + HFI1_USER_SWMINOR) #ifndef HFI1_KERN_TYPE #define HFI1_KERN_TYPE 0 @@ -349,6 +350,8 @@ struct hfi1_message_header { #define HFI1_BECN_MASK 1 #define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) +#define HFI1_PSM_IOC_BASE_SEQ 0x0 + static inline __u64 rhf_to_cpu(const __le32 *rbuf) { return __le64_to_cpu(*((__le64 *)rbuf)); diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288..dbab9d9cc288 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index b6fb6814f1b8..b6fb6814f1b8 100644 --- a/drivers/staging/rdma/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index c05c39da83b1..bf64b5a7bfd7 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -60,7 +60,8 @@ static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible) + bool user_accessible, + struct kobject *parent) { const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); struct device *device = NULL; @@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name, cdev_init(cdev, fops); cdev->owner = THIS_MODULE; + cdev->kobj.parent = parent; kobject_set_name(&cdev->kobj, name); ret = cdev_add(cdev, dev, 1); @@ -82,13 +84,13 @@ int hfi1_cdev_init(int minor, const char *name, else device = device_create(class, NULL, dev, NULL, "%s", name); - if (!IS_ERR(device)) - goto done; - ret = PTR_ERR(device); - device = NULL; - pr_err("Could not create device for minor %d, %s (err %d)\n", - minor, name, -ret); - cdev_del(cdev); + if (IS_ERR(device)) { + ret = PTR_ERR(device); + device = NULL; + pr_err("Could not create device for minor %d, %s (err %d)\n", + minor, name, -ret); + cdev_del(cdev); + } done: *devp = device; return ret; diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h index 5bb3e83cf2da..c3ec19cb0ac9 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/infiniband/hw/hfi1/device.h @@ -50,7 +50,8 @@ int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible); + bool user_accessible, + struct kobject *parent); void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); const char *class_name(void); int __init dev_init(void); diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c index 7e8dab892848..7e8dab892848 100644 --- a/drivers/staging/rdma/hfi1/dma.c +++ b/drivers/infiniband/hw/hfi1/dma.c diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 700c6fa3a633..c75b0ae688f8 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) ppd->lmc = lmc; hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); - dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid); + dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid); return 0; } diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index 106349fc1fb9..106349fc1fb9 100644 --- a/drivers/staging/rdma/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h index 94e9e70de568..94e9e70de568 100644 --- a/drivers/staging/rdma/hfi1/efivar.h +++ b/drivers/infiniband/hw/hfi1/efivar.h diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c new file mode 100644 index 000000000000..36b77943cbfd --- /dev/null +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -0,0 +1,102 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/delay.h> +#include "hfi.h" +#include "common.h" +#include "eprom.h" + +#define CMD_SHIFT 24 +#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) + +/* controller interface speeds */ +#define EP_SPEED_FULL 0x2 /* full speed */ + +/* + * How long to wait for the EPROM to become available, in ms. + * The spec 32 Mb EPROM takes around 40s to erase then write. + * Double it for safety. + */ +#define EPROM_TIMEOUT 80000 /* ms */ +/* + * Initialize the EPROM handler. + */ +int eprom_init(struct hfi1_devdata *dd) +{ + int ret = 0; + + /* only the discrete chip has an EPROM */ + if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) + return 0; + + /* + * It is OK if both HFIs reset the EPROM as long as they don't + * do it at the same time. + */ + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) { + dd_dev_err(dd, + "%s: unable to acquire EPROM resource, no EPROM support\n", + __func__); + goto done_asic; + } + + /* reset EPROM to be sure it is in a good state */ + + /* set reset */ + write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); + /* clear reset, set speed */ + write_csr(dd, ASIC_EEP_CTL_STAT, + EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); + + /* wake the device with command "release powerdown NoID" */ + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); + + dd->eprom_available = true; + release_chip_resource(dd, CR_EPROM); +done_asic: + return ret; +} diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..d41f0b1afb15 100644 --- a/drivers/staging/rdma/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c1c5bf82addb..7a5b0e676cc7 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -72,8 +72,6 @@ */ static int hfi1_file_open(struct inode *, struct file *); static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_file_write(struct file *, const char __user *, - size_t, loff_t *); static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); static int hfi1_file_mmap(struct file *, struct vm_area_struct *); @@ -86,8 +84,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32); static int get_base_info(struct file *, void __user *, __u32); static int setup_ctxt(struct file *); static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, - int, unsigned); +static int get_user_context(struct file *, struct hfi1_user_info *, int); static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); static int allocate_ctxt(struct file *, struct hfi1_devdata *, struct hfi1_user_info *); @@ -97,13 +94,15 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); static int vma_fault(struct vm_area_struct *, struct vm_fault *); +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg); static const struct file_operations hfi1_file_ops = { .owner = THIS_MODULE, - .write = hfi1_file_write, .write_iter = hfi1_write_iter, .open = hfi1_file_open, .release = hfi1_file_close, + .unlocked_ioctl = hfi1_file_ioctl, .poll = hfi1_poll, .mmap = hfi1_file_mmap, .llseek = noop_llseek, @@ -169,6 +168,13 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); + + /* Just take a ref now. Not all opens result in a context assign */ + kobject_get(&dd->kobj); + /* The real work is performed later in assign_ctxt() */ fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); if (fp->private_data) /* no cpu affinity by default */ @@ -176,127 +182,59 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } -static ssize_t hfi1_file_write(struct file *fp, const char __user *data, - size_t count, loff_t *offset) +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg) { - const struct hfi1_cmd __user *ucmd; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_cmd cmd; struct hfi1_user_info uinfo; struct hfi1_tid_info tinfo; + int ret = 0; unsigned long addr; - ssize_t consumed = 0, copy = 0, ret = 0; - void *dest = NULL; - __u64 user_val = 0; - int uctxt_required = 1; - int must_be_root = 0; - - /* FIXME: This interface cannot continue out of staging */ - if (WARN_ON_ONCE(!ib_safe_file_access(fp))) - return -EACCES; - - if (count < sizeof(cmd)) { - ret = -EINVAL; - goto bail; - } - - ucmd = (const struct hfi1_cmd __user *)data; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) { - ret = -EFAULT; - goto bail; - } - - consumed = sizeof(cmd); - - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: - uctxt_required = 0; /* assigned user context not required */ - copy = sizeof(uinfo); - dest = &uinfo; - break; - case HFI1_CMD_SDMA_STATUS_UPD: - case HFI1_CMD_CREDIT_UPD: - copy = 0; - break; - case HFI1_CMD_TID_UPDATE: - case HFI1_CMD_TID_FREE: - case HFI1_CMD_TID_INVAL_READ: - copy = sizeof(tinfo); - dest = &tinfo; - break; - case HFI1_CMD_USER_INFO: - case HFI1_CMD_RECV_CTRL: - case HFI1_CMD_POLL_TYPE: - case HFI1_CMD_ACK_EVENT: - case HFI1_CMD_CTXT_INFO: - case HFI1_CMD_SET_PKEY: - case HFI1_CMD_CTXT_RESET: - copy = 0; - user_val = cmd.addr; - break; - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - uctxt_required = 0; /* assigned user context not required */ - must_be_root = 1; /* validate user */ - copy = 0; - break; - default: - ret = -EINVAL; - goto bail; - } - - /* If the command comes with user data, copy it. */ - if (copy) { - if (copy_from_user(dest, (void __user *)cmd.addr, copy)) { - ret = -EFAULT; - goto bail; - } - consumed += copy; - } - - /* - * Make sure there is a uctxt when needed. - */ - if (uctxt_required && !uctxt) { - ret = -EINVAL; - goto bail; - } + int uval = 0; + unsigned long ul_uval = 0; + u16 uval16 = 0; + + hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); + if (cmd != HFI1_IOCTL_ASSIGN_CTXT && + cmd != HFI1_IOCTL_GET_VERS && + !uctxt) + return -EINVAL; - /* only root can do these operations */ - if (must_be_root && !capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto bail; - } + switch (cmd) { + case HFI1_IOCTL_ASSIGN_CTXT: + if (copy_from_user(&uinfo, + (struct hfi1_user_info __user *)arg, + sizeof(uinfo))) + return -EFAULT; - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: ret = assign_ctxt(fp, &uinfo); if (ret < 0) - goto bail; - ret = setup_ctxt(fp); + return ret; + setup_ctxt(fp); if (ret) - goto bail; + return ret; ret = user_init(fp); break; - case HFI1_CMD_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); - break; - case HFI1_CMD_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); + case HFI1_IOCTL_CTXT_INFO: + ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_ctxt_info)); break; - case HFI1_CMD_SDMA_STATUS_UPD: + case HFI1_IOCTL_USER_INFO: + ret = get_base_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_base_info)); break; - case HFI1_CMD_CREDIT_UPD: + case HFI1_IOCTL_CREDIT_UPD: if (uctxt && uctxt->sc) sc_return_credits(uctxt->sc); break; - case HFI1_CMD_TID_UPDATE: + + case HFI1_IOCTL_TID_UPDATE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + ret = hfi1_user_exp_rcv_setup(fp, &tinfo); if (!ret) { /* @@ -305,57 +243,82 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, * These fields are adjacent in the structure so * we can copy them at the same time. */ - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt) + sizeof(tinfo.length))) ret = -EFAULT; } break; - case HFI1_CMD_TID_INVAL_READ: - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + + case HFI1_IOCTL_TID_FREE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_clear(fp, &tinfo); if (ret) break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) ret = -EFAULT; break; - case HFI1_CMD_TID_FREE: - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + + case HFI1_IOCTL_TID_INVAL_READ: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); if (ret) break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) ret = -EFAULT; break; - case HFI1_CMD_RECV_CTRL: - ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); + + case HFI1_IOCTL_RECV_CTRL: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + ret = manage_rcvq(uctxt, fd->subctxt, uval); break; - case HFI1_CMD_POLL_TYPE: - uctxt->poll_type = (typeof(uctxt->poll_type))user_val; + + case HFI1_IOCTL_POLL_TYPE: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + uctxt->poll_type = (typeof(uctxt->poll_type))uval; break; - case HFI1_CMD_ACK_EVENT: - ret = user_event_ack(uctxt, fd->subctxt, user_val); + + case HFI1_IOCTL_ACK_EVENT: + ret = get_user(ul_uval, (unsigned long __user *)arg); + if (ret != 0) + return -EFAULT; + ret = user_event_ack(uctxt, fd->subctxt, ul_uval); break; - case HFI1_CMD_SET_PKEY: + + case HFI1_IOCTL_SET_PKEY: + ret = get_user(uval16, (u16 __user *)arg); + if (ret != 0) + return -EFAULT; if (HFI1_CAP_IS_USET(PKEY_CHECK)) - ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val); + ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); else - ret = -EPERM; + return -EPERM; break; - case HFI1_CMD_CTXT_RESET: { + + case HFI1_IOCTL_CTXT_RESET: { struct send_context *sc; struct hfi1_devdata *dd; - if (!uctxt || !uctxt->dd || !uctxt->sc) { - ret = -EINVAL; - break; - } + if (!uctxt || !uctxt->dd || !uctxt->sc) + return -EINVAL; + /* * There is no protection here. User level has to * guarantee that no one will be writing to the send @@ -373,10 +336,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, wait_event_interruptible_timeout( sc->halt_wait, (sc->flags & SCF_HALTED), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (!(sc->flags & SCF_HALTED)) { - ret = -ENOLCK; - break; - } + if (!(sc->flags & SCF_HALTED)) + return -ENOLCK; + /* * If the send context was halted due to a Freeze, * wait until the device has been "unfrozen" before @@ -387,18 +349,16 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, dd->event_queue, !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (dd->flags & HFI1_FROZEN) { - ret = -ENOLCK; - break; - } - if (dd->flags & HFI1_FORCED_FREEZE) { + if (dd->flags & HFI1_FROZEN) + return -ENOLCK; + + if (dd->flags & HFI1_FORCED_FREEZE) /* * Don't allow context reset if we are into * forced freeze */ - ret = -ENODEV; - break; - } + return -ENODEV; + sc_disable(sc); ret = sc_enable(sc); hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, @@ -410,18 +370,17 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, sc_return_credits(sc); break; } - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - ret = handle_eprom_command(fp, &cmd); + + case HFI1_IOCTL_GET_VERS: + uval = HFI1_USER_SWVERSION; + if (put_user(uval, (int __user *)arg)) + return -EFAULT; break; + + default: + return -EINVAL; } - if (ret >= 0) - ret = consumed; -bail: return ret; } @@ -738,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) { struct hfi1_filedata *fdata = fp->private_data; struct hfi1_ctxtdata *uctxt = fdata->uctxt; - struct hfi1_devdata *dd; + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); unsigned long flags, *ev; fp->private_data = NULL; @@ -747,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) goto done; hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); - dd = uctxt->dd; mutex_lock(&hfi1_mutex); flush_wc(); @@ -813,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + kobject_put(&dd->kobj); kfree(fdata); return 0; } @@ -836,7 +797,7 @@ static u64 kvirt_to_phys(void *addr) static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) { int i_minor, ret = 0; - unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS; + unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) { @@ -846,9 +807,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) swminor = uinfo->userversion & 0xffff; - if (uinfo->hfi1_alg < HFI1_ALG_COUNT) - alg = uinfo->hfi1_alg; - mutex_lock(&hfi1_mutex); /* First, lets check if we need to setup a shared context? */ if (uinfo->subctxt_cnt) { @@ -868,7 +826,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) */ if (!ret) { i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor - 1, alg); + ret = get_user_context(fp, uinfo, i_minor); } done_unlock: mutex_unlock(&hfi1_mutex); @@ -876,71 +834,26 @@ done: return ret; } -/* return true if the device available for general use */ -static int usable_device(struct hfi1_devdata *dd) -{ - struct hfi1_pportdata *ppd = dd->pport; - - return driver_lstate(ppd) == IB_PORT_ACTIVE; -} - static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno, unsigned alg) + int devno) { struct hfi1_devdata *dd = NULL; - int ret = 0, devmax, npresent, nup, dev; + int devmax, npresent, nup; devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) { - ret = -ENXIO; - goto done; - } - if (!nup) { - ret = -ENETDOWN; - goto done; - } - if (devno >= 0) { - dd = hfi1_lookup(devno); - if (!dd) - ret = -ENODEV; - else if (!dd->freectxts) - ret = -EBUSY; - } else { - struct hfi1_devdata *pdd; - - if (alg == HFI1_ALG_ACROSS) { - unsigned free = 0U; - - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts && - pdd->freectxts > free) { - dd = pdd; - free = pdd->freectxts; - } - } - } else { - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts) { - dd = pdd; - break; - } - } - } - if (!dd) - ret = -EBUSY; - } -done: - return ret ? ret : allocate_ctxt(fp, dd, uinfo); + if (!npresent) + return -ENXIO; + + if (!nup) + return -ENETDOWN; + + dd = hfi1_lookup(devno); + if (!dd) + return -ENODEV; + else if (!dd->freectxts) + return -EBUSY; + + return allocate_ctxt(fp, dd, uinfo); } static int find_shared_ctxt(struct file *fp, @@ -1546,170 +1459,10 @@ done: return ret; } -static int ui_open(struct inode *inode, struct file *filp) -{ - struct hfi1_devdata *dd; - - dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev); - filp->private_data = dd; /* for other methods */ - return 0; -} - -static int ui_release(struct inode *inode, struct file *filp) -{ - /* nothing to do */ - return 0; -} - -static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) -{ - struct hfi1_devdata *dd = filp->private_data; - - return fixed_size_llseek(filp, offset, whence, - (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE); -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, - loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base = dd->kregbase; - unsigned long total, csr_off, - barlen = (dd->kregend - dd->kregbase); - u64 data; - - /* only read 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* destination buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE)) - return -EINVAL; - /* only set the base if we are not starting past the BAR */ - if (*f_pos < barlen) - base += *f_pos; - csr_off = *f_pos; - for (total = 0; total < count; total += 8, csr_off += 8) { - /* accessing LCB CSRs requires more checks */ - if (is_lcb_offset(csr_off)) { - if (read_lcb_csr(dd, csr_off, (u64 *)&data)) - break; /* failed */ - } - /* - * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a - * false parity error. Avoid the whole issue by not reading - * them. These registers are defined as having a read value - * of 0. - */ - else if (csr_off == ASIC_GPIO_CLEAR || - csr_off == ASIC_GPIO_FORCE || - csr_off == ASIC_QSFP1_CLEAR || - csr_off == ASIC_QSFP1_FORCE || - csr_off == ASIC_QSFP2_CLEAR || - csr_off == ASIC_QSFP2_FORCE) - data = 0; - else if (csr_off >= barlen) { - /* - * read_8051_data can read more than just 8 bytes at - * a time. However, folding this into the loop and - * handling the reads in 8 byte increments allows us - * to smoothly transition from chip memory to 8051 - * memory. - */ - if (read_8051_data(dd, - (u32)(csr_off - barlen), - sizeof(data), &data)) - break; /* failed */ - } else - data = readq(base + total); - if (put_user(data, (unsigned long __user *)(buf + total))) - break; - } - *f_pos += total; - return total; -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base; - unsigned long total, data, csr_off; - int in_lcb; - - /* only write 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* source buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > dd->kregend - dd->kregbase) - return -EINVAL; - - base = (void __iomem *)dd->kregbase + *f_pos; - csr_off = *f_pos; - in_lcb = 0; - for (total = 0; total < count; total += 8, csr_off += 8) { - if (get_user(data, (unsigned long __user *)(buf + total))) - break; - /* accessing LCB CSRs requires a special procedure */ - if (is_lcb_offset(csr_off)) { - if (!in_lcb) { - int ret = acquire_lcb_access(dd, 1); - - if (ret) - break; - in_lcb = 1; - } - } else { - if (in_lcb) { - release_lcb_access(dd, 1); - in_lcb = 0; - } - } - writeq(data, base + total); - } - if (in_lcb) - release_lcb_access(dd, 1); - *f_pos += total; - return total; -} - -static const struct file_operations ui_file_ops = { - .owner = THIS_MODULE, - .llseek = ui_lseek, - .read = ui_read, - .write = ui_write, - .open = ui_open, - .release = ui_release, -}; - -#define UI_OFFSET 192 /* device minor offset for UI devices */ -static int create_ui = 1; - -static struct cdev wildcard_cdev; -static struct device *wildcard_device; - -static atomic_t user_count = ATOMIC_INIT(0); - static void user_remove(struct hfi1_devdata *dd) { - if (atomic_dec_return(&user_count) == 0) - hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device); hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); - hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device); } static int user_add(struct hfi1_devdata *dd) @@ -1717,34 +1470,13 @@ static int user_add(struct hfi1_devdata *dd) char name[10]; int ret; - if (atomic_inc_return(&user_count) == 1) { - ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops, - &wildcard_cdev, &wildcard_device, - true); - if (ret) - goto done; - } - snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, + ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, &dd->user_cdev, &dd->user_device, - true); + true, &dd->kobj); if (ret) - goto done; + user_remove(dd); - if (create_ui) { - snprintf(name, sizeof(name), - "%s_ui%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops, - &dd->ui_cdev, &dd->ui_device, - false); - if (ret) - goto done; - } - - return 0; -done: - user_remove(dd); return ret; } @@ -1753,13 +1485,7 @@ done: */ int hfi1_device_create(struct hfi1_devdata *dd) { - int r, ret; - - r = user_add(dd); - ret = hfi1_diag_add(dd); - if (r && !ret) - ret = r; - return ret; + return user_add(dd); } /* @@ -1769,5 +1495,4 @@ int hfi1_device_create(struct hfi1_devdata *dd) void hfi1_device_remove(struct hfi1_devdata *dd) { user_remove(dd); - hfi1_diag_remove(dd); } diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index ed680fda611d..ed680fda611d 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7b78d56de7f5..4417a0fd3ef9 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -453,6 +453,7 @@ struct rvt_sge_state; #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) +#define HLS_DOWN ~(HLS_UP) /* use this MTU size if none other is given */ #define HFI1_DEFAULT_ACTIVE_MTU 10240 @@ -1168,6 +1169,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct kobject kobj; }; /* 8051 firmware version helper */ @@ -1882,9 +1884,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) get_unit_name((dd)->unit), ##__VA_ARGS__) #define hfi1_dev_porterr(dd, port, fmt, ...) \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__) + dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ + get_unit_name((dd)->unit), (port), ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 502b7cf4647d..5cc492e5776d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); - if (lastfail) + if (lastfail) { dd_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); + ret = lastfail; + } } - if (lastfail) - ret = lastfail; /* Allocate enough memory for user event notification. */ len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * @@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd) dd->asic_data = NULL; } -void hfi1_free_devdata(struct hfi1_devdata *dd) +static void __hfi1_free_devdata(struct kobject *kobj) { + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); @@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) rvt_dealloc_device(&dd->verbs_dev.rdi); } +static struct kobj_type hfi1_devdata_type = { + .release = __hfi1_free_devdata, +}; + +void hfi1_free_devdata(struct hfi1_devdata *dd) +{ + kobject_put(&dd->kobj); +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) &pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; bail: @@ -1300,7 +1312,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_lock(&ppd->cc_state_lock); cc_state = get_cc_state(ppd); - rcu_assign_pointer(ppd->cc_state, NULL); + RCU_INIT_POINTER(ppd->cc_state, NULL); spin_unlock(&ppd->cc_state_lock); if (cc_state) diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 65348d16ab2f..65348d16ab2f 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..2ec6ef38d389 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index ed58cf21e790..219029576ba0 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1403,6 +1403,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* + * Don't update pkeys[2], if an HFI port without MgmtAllowed + * by neighbor is a switch. + */ + if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) + continue; + /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. @@ -3363,6 +3369,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, return reply((struct ib_mad_hdr *)smp); } +/* + * Apply congestion control information stored in the ppd to the + * active structure. + */ +static void apply_cc_state(struct hfi1_pportdata *ppd) +{ + struct cc_state *old_cc_state, *new_cc_state; + + new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); + if (!new_cc_state) + return; + + /* + * Hold the lock for updating *and* to prevent ppd information + * from changing during the update. + */ + spin_lock(&ppd->cc_state_lock); + + old_cc_state = get_cc_state(ppd); + if (!old_cc_state) { + /* never active, or shutting down */ + spin_unlock(&ppd->cc_state_lock); + kfree(new_cc_state); + return; + } + + *new_cc_state = *old_cc_state; + + new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + memcpy(new_cc_state->cct.entries, ppd->ccti_entries, + ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)); + + new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; + new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; + memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, + OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); + + rcu_assign_pointer(ppd->cc_state, new_cc_state); + + spin_unlock(&ppd->cc_state_lock); + + call_rcu(&old_cc_state->rcu, cc_state_reclaim); +} + static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -3374,6 +3424,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ + spin_lock(&ppd->cc_state_lock); ppd->cc_sl_control_map = be32_to_cpu(p->control_map); entries = ppd->congestion_entries; @@ -3384,6 +3439,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, p->entries[i].trigger_threshold; entries[i].ccti_min = p->entries[i].ccti_min; } + spin_unlock(&ppd->cc_state_lock); + + /* now apply the information */ + apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, resp_len); @@ -3526,7 +3585,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; - struct cc_state *old_cc_state, *new_cc_state; /* sanity check n_blocks, start_block */ if (n_blocks == 0 || @@ -3546,45 +3604,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); - if (!new_cc_state) - goto getit; - + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ spin_lock(&ppd->cc_state_lock); - - old_cc_state = get_cc_state(ppd); - - if (!old_cc_state) { - spin_unlock(&ppd->cc_state_lock); - kfree(new_cc_state); - return reply((struct ib_mad_hdr *)smp); - } - - *new_cc_state = *old_cc_state; - - new_cc_state->cct.ccti_limit = ccti_limit; - - entries = ppd->ccti_entries; ppd->total_cct_entry = ccti_limit + 1; - + entries = ppd->ccti_entries; for (j = 0, i = sentry; i < eentry; j++, i++) entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry); - - memcpy(new_cc_state->cct.entries, entries, - eentry * sizeof(struct ib_cc_table_entry)); - - new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; - new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; - memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, - OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); - - rcu_assign_pointer(ppd->cc_state, new_cc_state); - spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + /* now apply the information */ + apply_cc_state(ppd); -getit: return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); } diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 55ee08675333..55ee08675333 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 2b0e91d3093d..b7a80aa1ae30 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -45,6 +45,7 @@ * */ #include <linux/list.h> +#include <linux/rculist.h> #include <linux/mmu_notifier.h> #include <linux/interval_tree_generic.h> @@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) { struct mmu_rb_handler *handlr; - unsigned long flags; if (!ops->invalidate) return -EINVAL; @@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_add_tail(&handlr->list, &mmu_rb_handlers); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); + spin_unlock(&mmu_rb_lock); return mmu_notifier_register(&handlr->mn, current->mm); } @@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) if (current->mm) mmu_notifier_unregister(&handler->mn, current->mm); - spin_lock_irqsave(&mmu_rb_lock, flags); - list_del(&handler->list); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_del_rcu(&handler->list); + spin_unlock(&mmu_rb_lock); + synchronize_rcu(); spin_lock_irqsave(&handler->lock, flags); if (!RB_EMPTY_ROOT(root)) { @@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) { struct mmu_rb_handler *handler; - unsigned long flags; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_for_each_entry(handler, &mmu_rb_handlers, list) { + rcu_read_lock(); + list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { if (handler->root == root) goto unlock; } handler = NULL; unlock: - spin_unlock_irqrestore(&mmu_rb_lock, flags); + rcu_read_unlock(); return handler; } diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 7a57b9c49d27..7a57b9c49d27 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h index 6ef3c1cbdcd7..6ef3c1cbdcd7 100644 --- a/drivers/staging/rdma/hfi1/opa_compat.h +++ b/drivers/infiniband/hw/hfi1/opa_compat.h diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0bac21e6a658..0bac21e6a658 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index c67b9ad3fcf4..d5edb1afbb8f 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) struct pio_vl_map *oldmap, *newmap; if (!vl_scontexts) { - /* send context 0 reserved for VL15 */ - for (i = 1; i < dd->num_send_contexts; i++) + for (i = 0; i < dd->num_send_contexts; i++) if (dd->send_contexts[i].type == SC_KERNEL) num_kernel_send_contexts++; /* truncate divide */ diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 53a08edb7f64..464cbd27b975 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -49,10 +49,10 @@ /* send context types */ #define SC_KERNEL 0 -#define SC_ACK 1 -#define SC_USER 2 -#define SC_VL15 3 -#define SC_MAX 4 +#define SC_VL15 1 +#define SC_ACK 2 +#define SC_USER 3 /* must be the last one: it may take all left */ +#define SC_MAX 4 /* count of send context types */ /* invalid send context index */ #define INVALID_SCI 0xff diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b58849..8c25e1b58849 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 8fe8a205b5bb..03df9322f862 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd) */ } +void get_port_type(struct hfi1_pportdata *ppd) +{ + int ret; + + ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_PORT_TYPE, &ppd->port_type, + 4); + if (ret) + ppd->port_type = PORT_TYPE_UNKNOWN; +} + int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) { u8 tx_ctrl_byte = on ? 0x0 : 0xF; @@ -529,7 +540,8 @@ static void apply_tunings( /* Enable external device config if channel is limiting active */ read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, &config_data); - config_data |= limiting_active; + config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); + config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -542,7 +554,8 @@ static void apply_tunings( /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); - config_data |= tuning_method; + config_data &= ~(0xff << TUNING_METHOD_SHIFT); + config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT); ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -564,8 +577,8 @@ static void apply_tunings( ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, &config_data); /* Clear, then set the external device config field */ - config_data &= ~(0xFF << 24); - config_data |= (external_device_config << 24); + config_data &= ~(u32)0xFF; + config_data |= external_device_config; ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -784,12 +797,6 @@ void tune_serdes(struct hfi1_pportdata *ppd) return; } - ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, - 4); - if (ret) - ppd->port_type = PORT_TYPE_UNKNOWN; - switch (ppd->port_type) { case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index 19620cf546d5..e2c21613c326 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -298,6 +298,7 @@ enum link_tuning_encoding { /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); +void get_port_type(struct hfi1_pportdata *ppd); int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); void tune_serdes(struct hfi1_pportdata *ppd); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 91eb42316df9..1a942ffba4cb 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -49,7 +49,6 @@ #include <linux/vmalloc.h> #include <linux/hash.h> #include <linux/module.h> -#include <linux/random.h> #include <linux/seq_file.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> @@ -161,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu) * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. - * - * The actual flag used to determine "8k MTU" will change and is currently - * unknown. */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { @@ -516,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason) static void iowait_sdma_drained(struct iowait *wait) { struct rvt_qp *qp = iowait_to_qp(wait); + unsigned long flags; /* * This happens when the send engine notes @@ -523,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait) * do the flush work until that QP's * sdma work has finished. */ - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_flags & RVT_S_WAIT_DMA) { qp->s_flags &= ~RVT_S_WAIT_DMA; hfi1_schedule_send(qp); } - spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index e7bc8d6cf681..e7bc8d6cf681 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 2441669f0817..2441669f0817 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index dadc66c442b9..dadc66c442b9 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 792f15eb8efe..792f15eb8efe 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a659aec3c3c6..a659aec3c3c6 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index abb8ebc1fcac..f9befc05b349 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = { [sdma_state_s99_running] = "s99_Running", }; +#ifdef CONFIG_SDMA_VERBOSITY static const char * const sdma_event_names[] = { [sdma_event_e00_go_hw_down] = "e00_GoHwDown", [sdma_event_e10_go_hw_start] = "e10_GoHwStart", @@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = { [sdma_event_e85_link_down] = "e85_LinkDown", [sdma_event_e90_sw_halted] = "e90_SwHalted", }; +#endif static const struct sdma_set_state_action sdma_action_table[] = { [sdma_state_s00_hw_down] = { @@ -376,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde, sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); - if (iowait_sdma_dec(wait) && wait) + if (wait && iowait_sdma_dec(wait)) iowait_drain_wakeup(wait); } diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..8f50c99fe711 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h index bf7d777d756e..bf7d777d756e 100644 --- a/drivers/staging/rdma/hfi1/sdma_txreq.h +++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 8cd6df8634ad..91fc2aed6aed 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, } dd_dev_info(dd, - "IB%u: Congestion Control Agent enabled for port %d\n", - dd->unit, port_num); + "Congestion Control Agent enabled for port %d\n", + port_num); return 0; diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 8b62fefcf903..79b2952c0dfb 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" +#define IETH_PRN "ieth rkey 0x%.8x" #define ATOMICACKETH_PRN "origdata %lld" #define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" @@ -166,6 +167,12 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ud.deth[0]), be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; + /* ieth */ + case OP(RC, SEND_LAST_WITH_INVALIDATE): + case OP(RC, SEND_ONLY_WITH_INVALIDATE): + trace_seq_printf(p, IETH_PRN, + be32_to_cpu(eh->ieth)); + break; } trace_seq_putc(p, 0); return ret; @@ -233,3 +240,4 @@ __hfi1_trace_fn(FIRMWARE); __hfi1_trace_fn(RCVCTRL); __hfi1_trace_fn(TID); __hfi1_trace_fn(MMU); +__hfi1_trace_fn(IOCTL); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 963dc948c38a..28c1d0832886 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -74,8 +74,8 @@ __print_symbolic(etype, \ TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, - u64 eflags, u32 ctxt, + u64 eflags, u32 etype, u32 hlen, u32 tlen, @@ -392,6 +392,8 @@ __print_symbolic(opcode, \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ + ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ ib_opcode_name(UC_SEND_FIRST), \ ib_opcode_name(UC_SEND_MIDDLE), \ ib_opcode_name(UC_SEND_LAST), \ @@ -1341,6 +1343,7 @@ __hfi1_trace_def(FIRMWARE); __hfi1_trace_def(RCVCTRL); __hfi1_trace_def(TID); __hfi1_trace_def(MMU); +__hfi1_trace_def(IOCTL); #define hfi1_cdbg(which, fmt, ...) \ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c index e82e52a63d35..e82e52a63d35 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/infiniband/hw/hfi1/twsi.c diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h index 5b8a5b5e7eae..5b8a5b5e7eae 100644 --- a/drivers/staging/rdma/hfi1/twsi.h +++ b/drivers/infiniband/hw/hfi1/twsi.h diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index df773d433297..df773d433297 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 1e503ad0bebb..1e503ad0bebb 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 1b640a35b3fe..1b640a35b3fe 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..9bc8d9fba87e 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 88e10b5f55f1..88e10b5f55f1 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0014c9c0e967..29f4795f866c 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8; #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ +struct sdma_mmu_node; + struct user_sdma_iovec { struct list_head list; struct iovec iov; @@ -178,6 +180,7 @@ struct user_sdma_iovec { * which we last left off. */ u64 offset; + struct sdma_mmu_node *node; }; #define SDMA_CACHE_NODE_EVICT BIT(0) @@ -507,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + int req_queued = 0; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -703,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); + req_queued = 1; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) { @@ -747,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, return 0; free_req: user_sdma_free_request(req, true); - pq_update(pq); + if (req_queued) + pq_update(pq); set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); return ret; } @@ -1153,6 +1159,7 @@ retry: } iovec->pages = node->pages; iovec->npages = npages; + iovec->node = node; ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); if (ret) { @@ -1519,18 +1526,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) } if (req->data_iovs) { struct sdma_mmu_node *node; - struct mmu_rb_node *mnode; int i; for (i = 0; i < req->data_iovs; i++) { - mnode = hfi1_mmu_rb_search( - &req->pq->sdma_rb_root, - (unsigned long)req->iovs[i].iov.iov_base, - req->iovs[i].iov.iov_len); - if (!mnode || IS_ERR(mnode)) + node = req->iovs[i].node; + if (!node) continue; - node = container_of(mnode, struct sdma_mmu_node, rb); if (unpin) hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, &node->rb); diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index b9240e351161..b9240e351161 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9cdc85fa366f..849c4b9399d4 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -52,7 +52,6 @@ #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> -#include <linux/random.h> #include <linux/vmalloc.h> #include "hfi.h" @@ -336,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, @@ -946,7 +947,6 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); - dev->n_piowait++; qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3ee223983b20..488356775627 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -152,6 +152,7 @@ union ib_ehdrs { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } __packed; diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index bc95c4112c61..bc95c4112c61 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 1cf69b2fe4a5..1cf69b2fe4a5 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 82d7c4bf5970..ce4034071f9c 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1308,21 +1308,6 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { SYM_LSB(IntMask, fldname##17IntMask)), \ .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } -static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { - INTR_AUTO_P(SDmaInt), - INTR_AUTO_P(SDmaProgressInt), - INTR_AUTO_P(SDmaIdleInt), - INTR_AUTO_P(SDmaCleanupDone), - INTR_AUTO_C(RcvUrg), - INTR_AUTO_P(ErrInt), - INTR_AUTO(ErrInt), /* non-port-specific errs */ - INTR_AUTO(AssertGPIOInt), - INTR_AUTO_P(SendDoneInt), - INTR_AUTO(SendBufAvailInt), - INTR_AUTO_C(RcvAvail), - { .mask = 0, .sz = 0 } -}; - #define TXSYMPTOM_AUTO_P(fldname) \ { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ .msg = #fldname, .sz = sizeof(#fldname) } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 6888f03c6d61..4f878151f81f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -159,6 +159,7 @@ struct qib_other_headers { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } u; } __packed; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index b1ffc8b4a6c0..6ca6fa80dd6e 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return PTR_ERR(task); } + set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); kthread_bind(task, cpu); wake_up_process(task); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0ff765bfd619..0f4d4500f45e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, int count) { int m, i = 0; + struct rvt_dev_info *dev = ib_to_rvt(pd->device); mr->mapsz = 0; m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; for (; i < m; i++) { - mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); + mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, + dev->dparms.node); if (!mr->map[i]) { rvt_deinit_mregion(mr); return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0f12c211c385..5fa4d4d81ee0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) rvt_put_ss(&qp->s_rdma_read_sge); @@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (qp->ibqp.qp_type != IB_QPT_RC) return; - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + for (n = 0; n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && @@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + if (qp->s_ack_queue) + memset( + qp->s_ack_queue, + 0, + rvt_max_atomic(rdi) * + sizeof(*qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; @@ -653,9 +659,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - swq = vmalloc_node( + swq = vzalloc_node( (init_attr->cap.max_send_wr + 1) * sz, rdi->dparms.node); if (!swq) @@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); + if (init_attr->qp_type == IB_QPT_RC) { + qp->s_ack_queue = + kzalloc_node( + sizeof(*qp->s_ack_queue) * + rvt_max_atomic(rdi), + gfp, + rdi->dparms.node); + if (!qp->s_ack_queue) + goto bail_qp; + } /* * Driver needs to set up it's private QP structure and do any @@ -704,9 +720,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - qp->r_rq.wq = vmalloc_node( + qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, rdi->dparms.node); @@ -857,6 +873,7 @@ bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: + kfree(qp->s_ack_queue); kfree(qp); bail_swq: @@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->r_rq.wq); vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); + kfree(qp->s_ack_queue); kfree(qp); return 0; } diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig index f1f3ecadf0fb..2c5b0188ebbf 100644 --- a/drivers/staging/rdma/Kconfig +++ b/drivers/staging/rdma/Kconfig @@ -22,6 +22,4 @@ menuconfig STAGING_RDMA # Please keep entries in alphabetic order if STAGING_RDMA -source "drivers/staging/rdma/hfi1/Kconfig" - endif diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile index 8c7fc1de48a7..b5e94f169101 100644 --- a/drivers/staging/rdma/Makefile +++ b/drivers/staging/rdma/Makefile @@ -1,2 +1 @@ # Entries for RDMA_STAGING tree -obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO deleted file mode 100644 index 4c6f1d7d2eaf..000000000000 --- a/drivers/staging/rdma/hfi1/TODO +++ /dev/null @@ -1,6 +0,0 @@ -July, 2015 - -- Remove unneeded file entries in sysfs -- Remove software processing of IB protocol and place in library for use - by qib, ipath (if still present), hfi1, and eventually soft-roce -- Replace incorrect uAPI diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c deleted file mode 100644 index bb2409ad891a..000000000000 --- a/drivers/staging/rdma/hfi1/diag.c +++ /dev/null @@ -1,1925 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* - * This file contains support for diagnostic functions. It is accessed by - * opening the hfi1_diag device, normally minor number 129. Diagnostic use - * of the chip may render the chip or board unusable until the driver - * is unloaded, or in some cases, until the system is rebooted. - * - * Accesses to the chip through this interface are not similar to going - * through the /sys/bus/pci resource mmap interface. - */ - -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/poll.h> -#include <linux/vmalloc.h> -#include <linux/export.h> -#include <linux/fs.h> -#include <linux/uaccess.h> -#include <linux/module.h> -#include <rdma/ib_smi.h> -#include "hfi.h" -#include "device.h" -#include "common.h" -#include "verbs_txreq.h" -#include "trace.h" - -#undef pr_fmt -#define pr_fmt(fmt) DRIVER_NAME ": " fmt -#define snoop_dbg(fmt, ...) \ - hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__) - -/* Snoop option mask */ -#define SNOOP_DROP_SEND BIT(0) -#define SNOOP_USE_METADATA BIT(1) -#define SNOOP_SET_VL0TOVL15 BIT(2) - -static u8 snoop_flags; - -/* - * Extract packet length from LRH header. - * This is in Dwords so multiply by 4 to get size in bytes - */ -#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2) - -enum hfi1_filter_status { - HFI1_FILTER_HIT, - HFI1_FILTER_ERR, - HFI1_FILTER_MISS -}; - -/* snoop processing functions */ -rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = { - [RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler, - [RHF_RCV_TYPE_EAGER] = snoop_recv_handler, - [RHF_RCV_TYPE_IB] = snoop_recv_handler, - [RHF_RCV_TYPE_ERROR] = snoop_recv_handler, - [RHF_RCV_TYPE_BYPASS] = snoop_recv_handler, - [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID7] = process_receive_invalid -}; - -/* Snoop packet structure */ -struct snoop_packet { - struct list_head list; - u32 total_len; - u8 data[]; -}; - -/* Do not make these an enum or it will blow up the capture_md */ -#define PKT_DIR_EGRESS 0x0 -#define PKT_DIR_INGRESS 0x1 - -/* Packet capture metadata returned to the user with the packet. */ -struct capture_md { - u8 port; - u8 dir; - u8 reserved[6]; - union { - u64 pbc; - u64 rhf; - } u; -}; - -static atomic_t diagpkt_count = ATOMIC_INIT(0); -static struct cdev diagpkt_cdev; -static struct device *diagpkt_device; - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = diagpkt_write, - .llseek = noop_llseek, -}; - -/* - * This is used for communication with user space for snoop extended IOCTLs - */ -struct hfi1_link_info { - __be64 node_guid; - u8 port_mode; - u8 port_state; - u16 link_speed_active; - u16 link_width_active; - u16 vl15_init; - u8 port_number; - /* - * Add padding to make this a full IB SMP payload. Note: changing the - * size of this structure will make the IOCTLs created with _IOWR - * change. - * Be sure to run tests on all IOCTLs when making changes to this - * structure. - */ - u8 res[47]; -}; - -/* - * This starts our ioctl sequence numbers *way* off from the ones - * defined in ib_core. - */ -#define SNOOP_CAPTURE_VERSION 0x1 - -#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl-number.txt */ -#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC -#define HFI1_SNOOP_IOC_BASE_SEQ 0x80 - -#define HFI1_SNOOP_IOCGETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ) -#define HFI1_SNOOP_IOCSETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1) -#define HFI1_SNOOP_IOCCLEARQUEUE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2) -#define HFI1_SNOOP_IOCCLEARFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3) -#define HFI1_SNOOP_IOCSETFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4) -#define HFI1_SNOOP_IOCGETVERSION \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5) -#define HFI1_SNOOP_IOCSET_OPTS \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6) - -/* - * These offsets +6/+7 could change, but these are already known and used - * IOCTL numbers so don't change them without a good reason. - */ -#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \ - struct hfi1_link_info) -#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \ - struct hfi1_link_info) - -static int hfi1_snoop_open(struct inode *in, struct file *fp); -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off); -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait); -static int hfi1_snoop_release(struct inode *in, struct file *fp); - -struct hfi1_packet_filter_command { - int opcode; - int length; - void *value_ptr; -}; - -/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */ -#define HFI1_SNOOP_INGRESS 0x1 -#define HFI1_SNOOP_EGRESS 0x2 - -enum hfi1_packet_filter_opcodes { - FILTER_BY_LID, - FILTER_BY_DLID, - FILTER_BY_MAD_MGMT_CLASS, - FILTER_BY_QP_NUMBER, - FILTER_BY_PKT_TYPE, - FILTER_BY_SERVICE_LEVEL, - FILTER_BY_PKEY, - FILTER_BY_DIRECTION, -}; - -static const struct file_operations snoop_file_ops = { - .owner = THIS_MODULE, - .open = hfi1_snoop_open, - .read = hfi1_snoop_read, - .unlocked_ioctl = hfi1_ioctl, - .poll = hfi1_snoop_poll, - .write = hfi1_snoop_write, - .release = hfi1_snoop_release -}; - -struct hfi1_filter_array { - int (*filter)(void *, void *, void *); -}; - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value); - -static const struct hfi1_filter_array hfi1_filters[] = { - { hfi1_filter_lid }, - { hfi1_filter_dlid }, - { hfi1_filter_mad_mgmt_class }, - { hfi1_filter_qp_number }, - { hfi1_filter_ibpacket_type }, - { hfi1_filter_ib_service_level }, - { hfi1_filter_ib_pkey }, - { hfi1_filter_direction }, -}; - -#define HFI1_MAX_FILTERS ARRAY_SIZE(hfi1_filters) -#define HFI1_DIAG_MINOR_BASE 129 - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name); - -int hfi1_diag_add(struct hfi1_devdata *dd) -{ - char name[16]; - int ret = 0; - - snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(), - dd->unit); - /* - * Do this for each device as opposed to the normal diagpkt - * interface which is one per host - */ - ret = hfi1_snoop_add(dd, name); - if (ret) - dd_dev_err(dd, "Unable to init snoop/capture device"); - - snprintf(name, sizeof(name), "%s_diagpkt", class_name()); - if (atomic_inc_return(&diagpkt_count) == 1) { - ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name, - &diagpkt_file_ops, &diagpkt_cdev, - &diagpkt_device, false); - } - - return ret; -} - -/* this must be called w/ dd->snoop_in_lock held */ -static void drain_snoop_list(struct list_head *queue) -{ - struct list_head *pos, *q; - struct snoop_packet *packet; - - list_for_each_safe(pos, q, queue) { - packet = list_entry(pos, struct snoop_packet, list); - list_del(pos); - kfree(packet); - } -} - -static void hfi1_snoop_remove(struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); -} - -void hfi1_diag_remove(struct hfi1_devdata *dd) -{ - hfi1_snoop_remove(dd); - if (atomic_dec_and_test(&diagpkt_count)) - hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device); - hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device); -} - -/* - * Allocated structure shared between the credit return mechanism and - * diagpkt_send(). - */ -struct diagpkt_wait { - struct completion credits_returned; - int code; - atomic_t count; -}; - -/* - * When each side is finished with the structure, they call this. - * The last user frees the structure. - */ -static void put_diagpkt_wait(struct diagpkt_wait *wait) -{ - if (atomic_dec_and_test(&wait->count)) - kfree(wait); -} - -/* - * Callback from the credit return code. Set the complete, which - * will let diapkt_send() continue. - */ -static void diagpkt_complete(void *arg, int code) -{ - struct diagpkt_wait *wait = (struct diagpkt_wait *)arg; - - wait->code = code; - complete(&wait->credits_returned); - put_diagpkt_wait(wait); /* finished with the structure */ -} - -/** - * diagpkt_send - send a packet - * @dp: diag packet descriptor - */ -static ssize_t diagpkt_send(struct diag_pkt *dp) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - struct pio_buf *pbuf; - u32 *tmpbuf = NULL; - ssize_t ret = 0; - u32 pkt_len, total_len; - pio_release_cb credit_cb = NULL; - void *credit_arg = NULL; - struct diagpkt_wait *wait = NULL; - int trycount = 0; - - dd = hfi1_lookup(dp->unit); - if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) { - ret = -ENODEV; - goto bail; - } - if (!(dd->flags & HFI1_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -ENODEV; - goto bail; - } - - if (dp->version != _DIAG_PKT_VERS) { - dd_dev_err(dd, "Invalid version %u for diagpkt_write\n", - dp->version); - ret = -EINVAL; - goto bail; - } - - /* send count must be an exact number of dwords */ - if (dp->len & 3) { - ret = -EINVAL; - goto bail; - } - - /* there is only port 1 */ - if (dp->port != 1) { - ret = -EINVAL; - goto bail; - } - - /* need a valid context */ - if (dp->sw_index >= dd->num_send_contexts) { - ret = -EINVAL; - goto bail; - } - /* can only use kernel contexts */ - if (dd->send_contexts[dp->sw_index].type != SC_KERNEL && - dd->send_contexts[dp->sw_index].type != SC_VL15) { - ret = -EINVAL; - goto bail; - } - /* must be allocated */ - sc = dd->send_contexts[dp->sw_index].sc; - if (!sc) { - ret = -EINVAL; - goto bail; - } - /* must be enabled */ - if (!(sc->flags & SCF_ENABLED)) { - ret = -EINVAL; - goto bail; - } - - /* allocate a buffer and copy the data in */ - tmpbuf = vmalloc(dp->len); - if (!tmpbuf) { - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmpbuf, - (const void __user *)(unsigned long)dp->data, - dp->len)) { - ret = -EFAULT; - goto bail; - } - - /* - * pkt_len is how much data we have to write, includes header and data. - * total_len is length of the packet in Dwords plus the PBC should not - * include the CRC. - */ - pkt_len = dp->len >> 2; - total_len = pkt_len + 2; /* PBC + packet */ - - /* if 0, fill in a default */ - if (dp->pbc == 0) { - struct hfi1_pportdata *ppd = dd->pport; - - hfi1_cdbg(PKT, "Generating PBC"); - dp->pbc = create_pbc(ppd, 0, 0, 0, total_len); - } else { - hfi1_cdbg(PKT, "Using passed in PBC"); - } - - hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc); - - /* - * The caller wants to wait until the packet is sent and to - * check for errors. The best we can do is wait until - * the buffer credits are returned and check if any packet - * error has occurred. If there are any late errors, this - * could miss it. If there are other senders who generate - * an error, this may find it. However, in general, it - * should catch most. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - /* always force a credit return */ - dp->pbc |= PBC_CREDIT_RETURN; - /* turn on credit return interrupts */ - sc_add_credit_return_intr(sc); - wait = kmalloc(sizeof(*wait), GFP_KERNEL); - if (!wait) { - ret = -ENOMEM; - goto bail; - } - init_completion(&wait->credits_returned); - atomic_set(&wait->count, 2); - wait->code = PRC_OK; - - credit_cb = diagpkt_complete; - credit_arg = wait; - } - -retry: - pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg); - if (!pbuf) { - if (trycount == 0) { - /* force a credit return and try again */ - sc_return_credits(sc); - trycount = 1; - goto retry; - } - /* - * No send buffer means no credit callback. Undo - * the wait set-up that was done above. We free wait - * because the callback will never be called. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - sc_del_credit_return_intr(sc); - kfree(wait); - wait = NULL; - } - ret = -ENOSPC; - goto bail; - } - - pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len); - /* no flush needed as the HW knows the packet size */ - - ret = sizeof(*dp); - - if (dp->flags & F_DIAGPKT_WAIT) { - /* wait for credit return */ - ret = wait_for_completion_interruptible( - &wait->credits_returned); - /* - * If the wait returns an error, the wait was interrupted, - * e.g. with a ^C in the user program. The callback is - * still pending. This is OK as the wait structure is - * kmalloc'ed and the structure will free itself when - * all users are done with it. - * - * A context disable occurs on a send context restart, so - * include that in the list of errors below to check for. - * NOTE: PRC_FILL_ERR is at best informational and cannot - * be depended on. - */ - if (!ret && (((wait->code & PRC_STATUS_ERR) || - (wait->code & PRC_FILL_ERR) || - (wait->code & PRC_SC_DISABLE)))) - ret = -EIO; - - put_diagpkt_wait(wait); /* finished with the structure */ - sc_del_credit_return_intr(sc); - } - -bail: - vfree(tmpbuf); - return ret; -} - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - u8 vl; - - struct diag_pkt dp; - - if (count != sizeof(dp)) - return -EINVAL; - - if (copy_from_user(&dp, data, sizeof(dp))) - return -EFAULT; - - /* - * The Send Context is derived from the PbcVL value - * if PBC is populated - */ - if (dp.pbc) { - dd = hfi1_lookup(dp.unit); - if (!dd) - return -ENODEV; - vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; - if (sc) { - dp.sw_index = sc->sw_index; - hfi1_cdbg( - PKT, - "Packet sent over VL %d via Send Context %u(%u)", - vl, sc->sw_index, sc->hw_context); - } - } - - return diagpkt_send(&dp); -} - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) -{ - int ret = 0; - - dd->hfi1_snoop.mode_flag = 0; - spin_lock_init(&dd->hfi1_snoop.snoop_lock); - INIT_LIST_HEAD(&dd->hfi1_snoop.queue); - init_waitqueue_head(&dd->hfi1_snoop.waitq); - - ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name, - &snoop_file_ops, - &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev, - false); - - if (ret) { - dd_dev_err(dd, "Couldn't create %s device: %d", name, ret); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, - &dd->hfi1_snoop.class_dev); - } - - return ret; -} - -static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in) -{ - int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE; - struct hfi1_devdata *dd; - - dd = hfi1_lookup(unit); - return dd; -} - -/* clear or restore send context integrity checks */ -static void adjust_integrity_checks(struct hfi1_devdata *dd) -{ - struct send_context *sc; - unsigned long sc_flags; - int i; - - spin_lock_irqsave(&dd->sc_lock, sc_flags); - for (i = 0; i < dd->num_send_contexts; i++) { - int enable; - - sc = dd->send_contexts[i].sc; - - if (!sc) - continue; /* not allocated */ - - enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE; - - set_pio_integrity(sc); - - if (enable) /* take HFI_CAP_* flags into account */ - hfi1_init_ctxt(sc); - } - spin_unlock_irqrestore(&dd->sc_lock, sc_flags); -} - -static int hfi1_snoop_open(struct inode *in, struct file *fp) -{ - int ret; - int mode_flag = 0; - unsigned long flags = 0; - struct hfi1_devdata *dd; - struct list_head *queue; - - mutex_lock(&hfi1_mutex); - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) { - ret = -ENODEV; - goto bail; - } - - /* - * File mode determines snoop or capture. Some existing user - * applications expect the capture device to be able to be opened RDWR - * because they expect a dedicated capture device. For this reason we - * support a module param to force capture mode even if the file open - * mode matches snoop. - */ - if ((fp->f_flags & O_ACCMODE) == O_RDONLY) { - snoop_dbg("Capture Enabled"); - mode_flag = HFI1_PORT_CAPTURE_MODE; - } else if ((fp->f_flags & O_ACCMODE) == O_RDWR) { - snoop_dbg("Snoop Enabled"); - mode_flag = HFI1_PORT_SNOOP_MODE; - } else { - snoop_dbg("Invalid"); - ret = -EINVAL; - goto bail; - } - queue = &dd->hfi1_snoop.queue; - - /* - * We are not supporting snoop and capture at the same time. - */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.mode_flag) { - ret = -EBUSY; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - goto bail; - } - - dd->hfi1_snoop.mode_flag = mode_flag; - drain_snoop_list(queue); - - dd->hfi1_snoop.filter_callback = NULL; - dd->hfi1_snoop.filter_value = NULL; - - /* - * Send side packet integrity checks are not helpful when snooping so - * disable and re-enable when we stop snooping. - */ - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* clear after snoop mode is on */ - adjust_integrity_checks(dd); /* clear */ - - /* - * We also do not want to be doing the DLID LMC check for - * ingressed packets. - */ - dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1); - write_csr(dd, DCC_CFG_PORT_CONFIG1, - (dd->hfi1_snoop.dcc_cfg >> 32) << 32); - } - - /* - * As soon as we set these function pointers the recv and send handlers - * are active. This is a race condition so we must make sure to drain - * the queue and init filter values above. Technically we should add - * locking here but all that will happen is on recv a packet will get - * allocated and get stuck on the snoop_lock before getting added to the - * queue. Same goes for send. - */ - dd->rhf_rcv_function_map = snoop_rhf_rcv_functions; - dd->process_pio_send = snoop_send_pio_handler; - dd->process_dma_send = snoop_send_pio_handler; - dd->pio_inline_send = snoop_inline_pio_send; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - ret = 0; - -bail: - mutex_unlock(&hfi1_mutex); - - return ret; -} - -static int hfi1_snoop_release(struct inode *in, struct file *fp) -{ - unsigned long flags = 0; - struct hfi1_devdata *dd; - int mode_flag; - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - /* clear the snoop mode before re-adjusting send context CSRs */ - mode_flag = dd->hfi1_snoop.mode_flag; - dd->hfi1_snoop.mode_flag = 0; - - /* - * Drain the queue and clear the filters we are done with it. Don't - * forget to restore the packet integrity checks - */ - drain_snoop_list(&dd->hfi1_snoop.queue); - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* restore after snoop mode is clear */ - adjust_integrity_checks(dd); /* restore */ - - /* - * Also should probably reset the DCC_CONFIG1 register for DLID - * checking on incoming packets again. Use the value saved when - * opening the snoop device. - */ - write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg); - } - - dd->hfi1_snoop.filter_callback = NULL; - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - - /* - * User is done snooping and capturing, return control to the normal - * handler. Re-enable SDMA handling. - */ - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - dd->process_pio_send = hfi1_verbs_send_pio; - dd->process_dma_send = hfi1_verbs_send_dma; - dd->pio_inline_send = pio_copy; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - snoop_dbg("snoop/capture device released"); - - return 0; -} - -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait) -{ - int ret = 0; - unsigned long flags = 0; - - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - poll_wait(fp, &dd->hfi1_snoop.waitq, wait); - if (!list_empty(&dd->hfi1_snoop.queue)) - ret |= POLLIN | POLLRDNORM; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - return ret; -} - -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct diag_pkt dpkt; - struct hfi1_devdata *dd; - size_t ret; - u8 byte_two, sl, sc5, sc4, vl, byte_one; - struct send_context *sc; - u32 len; - u64 pbc; - struct hfi1_ibport *ibp; - struct hfi1_pportdata *ppd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - ppd = dd->pport; - snoop_dbg("received %lu bytes from user", count); - - memset(&dpkt, 0, sizeof(struct diag_pkt)); - dpkt.version = _DIAG_PKT_VERS; - dpkt.unit = dd->unit; - dpkt.port = 1; - - if (likely(!(snoop_flags & SNOOP_USE_METADATA))) { - /* - * We need to generate the PBC and not let diagpkt_send do it, - * to do this we need the VL and the length in dwords. - * The VL can be determined by using the SL and looking up the - * SC. Then the SC can be converted into VL. The exception to - * this is those packets which are from an SMI queue pair. - * Since we can't detect anything about the QP here we have to - * rely on the SC. If its 0xF then we assume its SMI and - * do not look at the SL. - */ - if (copy_from_user(&byte_one, data, 1)) - return -EINVAL; - - if (copy_from_user(&byte_two, data + 1, 1)) - return -EINVAL; - - sc4 = (byte_one >> 4) & 0xf; - if (sc4 == 0xF) { - snoop_dbg("Detected VL15 packet ignoring SL in packet"); - vl = sc4; - } else { - sl = (byte_two >> 4) & 0xf; - ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1); - sc5 = ibp->sl_to_sc[sl]; - vl = sc_to_vlt(dd, sc5); - if (vl != sc4) { - snoop_dbg("VL %d does not match SC %d of packet", - vl, sc4); - return -EINVAL; - } - } - - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - snoop_dbg("Sending on context %u(%u)", sc->sw_index, - sc->hw_context); - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - - len = (count >> 2) + 2; /* Add in PBC */ - pbc = create_pbc(ppd, 0, 0, vl, len); - } else { - if (copy_from_user(&pbc, data, sizeof(pbc))) - return -EINVAL; - vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - data += sizeof(pbc); - count -= sizeof(pbc); - } - dpkt.len = count; - dpkt.data = (unsigned long)data; - - snoop_dbg("PBC: vl=0x%llx Length=0x%llx", - (pbc >> 12) & 0xf, - (pbc & 0xfff)); - - dpkt.pbc = pbc; - ret = diagpkt_send(&dpkt); - /* - * diagpkt_send only returns number of bytes in the diagpkt so patch - * that up here before returning. - */ - if (ret == sizeof(dpkt)) - return count; - - return ret; -} - -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off) -{ - ssize_t ret = 0; - unsigned long flags = 0; - struct snoop_packet *packet = NULL; - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - while (list_empty(&dd->hfi1_snoop.queue)) { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - if (fp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible( - dd->hfi1_snoop.waitq, - !list_empty(&dd->hfi1_snoop.queue))) - return -EINTR; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - } - - if (!list_empty(&dd->hfi1_snoop.queue)) { - packet = list_entry(dd->hfi1_snoop.queue.next, - struct snoop_packet, list); - list_del(&packet->list); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - if (pkt_len >= packet->total_len) { - if (copy_to_user(data, packet->data, - packet->total_len)) - ret = -EFAULT; - else - ret = packet->total_len; - } else { - ret = -EINVAL; - } - - kfree(packet); - } else { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - } - - return ret; -} - -/** - * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others - * @ppd : ptr to hfi1 port data - * @value : options from user space - * - * Assumes the rest of the CM credit registers are zero from a - * previous global or credit reset. - * Leave shared count at zero for both global and all vls. - * In snoop mode ideally we don't use shared credits - * Reserve 8.5k for VL15 - * If total credits less than 8.5kbytes return error. - * Divide the rest of the credits across VL0 to VL7 and if - * each of these levels has less than 34 credits (at least 2048 + 128 bytes) - * return with an error. - * The credit registers will be reset to zero on link negotiation or link up - * so this function should be activated from user space only if the port has - * gone past link negotiation and link up. - * - * Return -- 0 if successful else error condition - * - */ -static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, - int value) -{ -#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */ - struct buffer_control t; - int i; - struct hfi1_devdata *dd = ppd->dd; - u16 total_credits = (value >> 16) & 0xffff; - u16 vl15_credits = dd->vl15_init / 2; - u16 per_vl_credits; - __be16 be_per_vl_credits; - - if (!(ppd->host_link_state & HLS_UP)) - goto err_exit; - if (total_credits < vl15_credits) - goto err_exit; - - per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL; - - if (per_vl_credits < OPA_MIN_PER_VL_CREDITS) - goto err_exit; - - memset(&t, 0, sizeof(t)); - be_per_vl_credits = cpu_to_be16(per_vl_credits); - - for (i = 0; i < TXE_NUM_DATA_VL; i++) - t.vl[i].dedicated = be_per_vl_credits; - - t.vl[15].dedicated = cpu_to_be16(vl15_credits); - return set_buffer_control(ppd, &t); - -err_exit: - snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d", - ppd->host_link_state, total_credits, vl15_credits); - - return -EINVAL; -} - -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - struct hfi1_devdata *dd; - void *filter_value = NULL; - long ret = 0; - int value = 0; - u8 phys_state = 0; - u8 link_state = 0; - u16 dev_state = 0; - unsigned long flags = 0; - unsigned long *argp = NULL; - struct hfi1_packet_filter_command filter_cmd = {0}; - int mode_flag = 0; - struct hfi1_pportdata *ppd = NULL; - unsigned int index; - struct hfi1_link_info link_info; - int read_cmd, write_cmd, read_ok, write_ok; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - mode_flag = dd->hfi1_snoop.mode_flag; - read_cmd = _IOC_DIR(cmd) & _IOC_READ; - write_cmd = _IOC_DIR(cmd) & _IOC_WRITE; - write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)); - read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)); - - if ((read_cmd && !write_ok) || (write_cmd && !read_ok)) - return -EFAULT; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if ((mode_flag & HFI1_PORT_CAPTURE_MODE) && - (cmd != HFI1_SNOOP_IOCCLEARQUEUE) && - (cmd != HFI1_SNOOP_IOCCLEARFILTER) && - (cmd != HFI1_SNOOP_IOCSETFILTER)) - /* Capture devices are allowed only 3 operations - * 1.Clear capture queue - * 2.Clear capture filter - * 3.Set capture filter - * Other are invalid. - */ - return -EINVAL; - - switch (cmd) { - case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA: - memset(&link_info, 0, sizeof(link_info)); - - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - - value = link_info.port_state; - index = link_info.port_number; - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - /* What we want to transition to */ - phys_state = (value >> 4) & 0xF; - link_state = value & 0xF; - snoop_dbg("Setting link state 0x%x", value); - - switch (link_state) { - case IB_PORT_NOP: - if (phys_state == 0) - break; - /* fall through */ - case IB_PORT_DOWN: - switch (phys_state) { - case 0: - dev_state = HLS_DN_DOWNDEF; - break; - case 2: - dev_state = HLS_DN_POLL; - break; - case 3: - dev_state = HLS_DN_DISABLE; - break; - default: - return -EINVAL; - } - ret = set_link_state(ppd, dev_state); - break; - case IB_PORT_ARMED: - ret = set_link_state(ppd, HLS_UP_ARMED); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ARM); - break; - case IB_PORT_ACTIVE: - ret = set_link_state(ppd, HLS_UP_ACTIVE); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ACTIVE); - break; - default: - return -EINVAL; - } - - if (ret) - break; - /* fall through */ - case HFI1_SNOOP_IOCGETLINKSTATE: - case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA: - if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) { - memset(&link_info, 0, sizeof(link_info)); - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - index = link_info.port_number; - } else { - ret = __get_user(index, (int __user *)arg); - if (ret != 0) - break; - } - - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - value = hfi1_ibphys_portstate(ppd); - value <<= 4; - value |= driver_lstate(ppd); - - snoop_dbg("Link port | Link State: %d", value); - - if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) || - (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) { - link_info.port_state = value; - link_info.node_guid = cpu_to_be64(ppd->guid); - link_info.link_speed_active = - ppd->link_speed_active; - link_info.link_width_active = - ppd->link_width_active; - if (copy_to_user((struct hfi1_link_info __user *)arg, - &link_info, sizeof(link_info))) - return -EFAULT; - } else { - ret = __put_user(value, (int __user *)arg); - } - break; - - case HFI1_SNOOP_IOCCLEARQUEUE: - snoop_dbg("Clearing snoop queue"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCCLEARFILTER: - snoop_dbg("Clearing filter"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.filter_callback) { - /* Drain packets first */ - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = NULL; - } - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCSETFILTER: - snoop_dbg("Setting filter"); - /* just copy command structure */ - argp = (unsigned long *)arg; - if (copy_from_user(&filter_cmd, (void __user *)argp, - sizeof(filter_cmd))) - return -EFAULT; - - if (filter_cmd.opcode >= HFI1_MAX_FILTERS) { - pr_alert("Invalid opcode in request\n"); - return -EINVAL; - } - - snoop_dbg("Opcode %d Len %d Ptr %p", - filter_cmd.opcode, filter_cmd.length, - filter_cmd.value_ptr); - - filter_value = kcalloc(filter_cmd.length, sizeof(u8), - GFP_KERNEL); - if (!filter_value) - return -ENOMEM; - - /* copy remaining data from userspace */ - if (copy_from_user((u8 *)filter_value, - (void __user *)filter_cmd.value_ptr, - filter_cmd.length)) { - kfree(filter_value); - return -EFAULT; - } - /* Drain packets first */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = - hfi1_filters[filter_cmd.opcode].filter; - /* just in case we see back to back sets */ - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = filter_value; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - case HFI1_SNOOP_IOCGETVERSION: - value = SNOOP_CAPTURE_VERSION; - snoop_dbg("Getting version: %d", value); - ret = __put_user(value, (int __user *)arg); - break; - case HFI1_SNOOP_IOCSET_OPTS: - snoop_flags = 0; - ret = __get_user(value, (int __user *)arg); - if (ret != 0) - break; - - snoop_dbg("Setting snoop option %d", value); - if (value & SNOOP_DROP_SEND) - snoop_flags |= SNOOP_DROP_SEND; - if (value & SNOOP_USE_METADATA) - snoop_flags |= SNOOP_USE_METADATA; - if (value & (SNOOP_SET_VL0TOVL15)) { - ppd = &dd->pport[0]; /* first port will do */ - ret = hfi1_assign_snoop_link_credits(ppd, value); - } - break; - default: - return -ENOTTY; - } - - return ret; -} - -static void snoop_list_add_tail(struct snoop_packet *packet, - struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) || - (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) { - list_add_tail(&packet->list, &dd->hfi1_snoop.queue); - snoop_dbg("Added packet to list"); - } - - /* - * Technically we can could have closed the snoop device while waiting - * on the above lock and it is gone now. The snoop mode_flag will - * prevent us from adding the packet to the queue though. - */ - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - wake_up_interruptible(&dd->hfi1_snoop.waitq); -} - -static inline int hfi1_filter_check(void *val, const char *msg) -{ - if (!val) { - snoop_dbg("Error invalid %s value for filter", msg); - return HFI1_FILTER_ERR; - } - return 0; -} - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */ - return HFI1_FILTER_HIT; /* matched */ - - return HFI1_FILTER_MISS; /* Not matched */ -} - -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1])) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* Not valid for outgoing packets, send handler passes null for data*/ -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - struct ib_smp *smp = NULL; - u32 qpn = 0; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(packet_data, "packet_data"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - - qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF; - if (qpn <= 1) { - smp = (struct ib_smp *)packet_data; - if (*((u8 *)value) == smp->mgmt_class) - return HFI1_FILTER_HIT; - else - return HFI1_FILTER_MISS; - } - return HFI1_FILTER_ERR; -} - -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value) -{ - u32 lnh = 0; - u8 opcode = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - - if (*((u8 *)value) == ((opcode >> 5) & 0x7)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value) -{ - u32 lnh = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - /* P_key is 16-bit entity, however top most bit indicates - * type of membership. 0 for limited and 1 for Full. - * Limited members cannot accept information from other - * Limited members, but communication is allowed between - * every other combination of membership. - * Hence we'll omit comparing top-most bit while filtering - */ - - if ((*(u16 *)value & 0x7FFF) == - ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* - * If packet_data is NULL then this is coming from one of the send functions. - * Thus we know if its an ingressed or egressed packet. - */ -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value) -{ - u8 user_dir = *(u8 *)value; - int ret; - - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - if (packet_data) { - /* Incoming packet */ - if (user_dir & HFI1_SNOOP_INGRESS) - return HFI1_FILTER_HIT; - } else { - /* Outgoing packet */ - if (user_dir & HFI1_SNOOP_EGRESS) - return HFI1_FILTER_HIT; - } - - return HFI1_FILTER_MISS; -} - -/* - * Allocate a snoop packet. The structure that is stored in the ring buffer, not - * to be confused with an hfi packet type. - */ -static struct snoop_packet *allocate_snoop_packet(u32 hdr_len, - u32 data_len, - u32 md_len) -{ - struct snoop_packet *packet; - - packet = kzalloc(sizeof(*packet) + hdr_len + data_len - + md_len, - GFP_ATOMIC | __GFP_NOWARN); - if (likely(packet)) - INIT_LIST_HEAD(&packet->list); - - return packet; -} - -/* - * Instead of having snoop and capture code intermixed with the recv functions, - * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call - * and land in here for snoop/capture but if not enabled the call will go - * through as before. This gives us a single point to constrain all of the snoop - * snoop recv logic. There is nothing special that needs to happen for bypass - * packets. This routine should not try to look into the packet. It just copied - * it. There is no guarantee for filters when it comes to bypass packets as - * there is no specific support. Bottom line is this routine does now even know - * what a bypass packet is. - */ -int snoop_recv_handler(struct hfi1_packet *packet) -{ - struct hfi1_pportdata *ppd = packet->rcd->ppd; - struct hfi1_ib_header *hdr = packet->hdr; - int header_size = packet->hlen; - void *data = packet->ebuf; - u32 tlen = packet->tlen; - struct snoop_packet *s_packet = NULL; - int ret; - int snoop_mode = 0; - u32 md_len = 0; - struct capture_md md; - - snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen, - data); - - trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size, - data); - - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - break; - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(header_size, - tlen - header_size, - md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - break; - } - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_INGRESS; - md.u.rhf = packet->rhf; - memcpy(s_packet->data, &md, md_len); - } - - /* We should always have a header */ - if (hdr) { - memcpy(s_packet->data + md_len, hdr, header_size); - } else { - dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - break; - } - - /* - * Packets with no data are possible. If there is no data needed - * to take care of the last 4 bytes which are normally included - * with data buffers and are included in tlen. Since we kzalloc - * the buffer we do not need to set any values but if we decide - * not to use kzalloc we should zero them. - */ - if (data) - memcpy(s_packet->data + header_size + md_len, data, - tlen - header_size); - - s_packet->total_len = tlen + md_len; - snoop_list_add_tail(s_packet, ppd->dd); - - /* - * If we are snooping the packet not capturing then throw away - * after adding to the list. - */ - snoop_dbg("Capturing packet"); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) { - snoop_dbg("Throwing packet away"); - /* - * If we are dropping the packet we still may need to - * handle the case where error flags are set, this is - * normally done by the type specific handler but that - * won't be called in this case. - */ - if (unlikely(rhf_err_flags(packet->rhf))) - handle_eflags(packet); - - /* throw the packet on the floor */ - return RHF_RCV_CONTINUE; - } - break; - default: - break; - } - - /* - * We do not care what type of packet came in here - just pass it off - * to the normal handler. - */ - return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)] - (packet); -} - -/* - * Handle snooping and capturing packets when sdma is being used. - */ -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n"); - snoop_dbg("Unsupported Operation"); - return hfi1_verbs_send_dma(qp, ps, 0); -} - -/* - * Handle snooping and capturing packets when pio is being used. Does not handle - * bypass packets. The only way to send a bypass packet currently is to use the - * diagpkt interface. When that interface is enable snoop/capture is not. - */ -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; - u32 dwords = (len + 3) >> 2; - u32 plen = hdrwords + dwords + 2; /* includes pbc */ - struct hfi1_pportdata *ppd = ps->ppd; - struct snoop_packet *s_packet = NULL; - u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u32 length = 0; - struct rvt_sge_state temp_ss; - void *data = NULL; - void *data_start = NULL; - int ret; - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - u32 vl; - u32 hdr_len = hdrwords << 2; - u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr); - - md.u.pbc = 0; - - snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u", - hdrwords, len, plen, dwords, tlen); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - /* not using ss->total_len as arg 2 b/c that does not count CRC */ - s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - goto out; - } - - s_packet->total_len = tlen + md_len; - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - if (likely(pbc == 0)) { - vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12; - md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen); - } else { - md.u.pbc = 0; - } - memcpy(s_packet->data, &md, md_len); - } else { - md.u.pbc = pbc; - } - - /* Copy header */ - if (likely(hdr)) { - memcpy(s_packet->data + md_len, hdr, hdr_len); - } else { - dd_dev_err(ppd->dd, - "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - goto out; - } - - if (ss) { - data = s_packet->data + hdr_len + md_len; - data_start = data; - - /* - * Copy SGE State - * The update_sge() function below will not modify the - * individual SGEs in the array. It will make a copy each time - * and operate on that. So we only need to copy this instance - * and it won't impact PIO. - */ - temp_ss = *ss; - length = len; - - snoop_dbg("Need to copy %d bytes", length); - while (length) { - void *addr = temp_ss.sge.vaddr; - u32 slen = temp_ss.sge.length; - - if (slen > length) { - slen = length; - snoop_dbg("slen %d > len %d", slen, length); - } - snoop_dbg("copy %d to %p", slen, addr); - memcpy(data, addr, slen); - update_sge(&temp_ss, slen); - length -= slen; - data += slen; - snoop_dbg("data is now %p bytes left %d", data, length); - } - snoop_dbg("Completed SGE copy"); - } - - /* - * Why do the filter check down here? Because the event tracing has its - * own filtering and we need to have the walked the SGE list. - */ - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set\n"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback( - &ps->s_txreq->phdr.hdr, - NULL, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - kfree(s_packet); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - snoop_list_add_tail(s_packet, ppd->dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && - (ppd->dd->hfi1_snoop.mode_flag & - HFI1_PORT_SNOOP_MODE))) { - unsigned long flags; - - snoop_dbg("Dropping packet"); - if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_send_complete( - qp, - qp->s_wqe, - IB_WC_SUCCESS); - spin_unlock_irqrestore(&qp->s_lock, flags); - } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_rc_send_complete(qp, - &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - - /* - * If snoop is dropping the packet we need to put the - * txreq back because no one else will. - */ - hfi1_put_txreq(ps->s_txreq); - return 0; - } - break; - default: - kfree(s_packet); - break; - } -out: - return hfi1_verbs_send_pio(qp, ps, md.u.pbc); -} - -/* - * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently - * this can be used anywhere, but the intention is for inline ACKs for RC and - * CCA packets. We don't restrict this usage though. - */ -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count) -{ - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - struct snoop_packet *s_packet = NULL; - - /* - * count is in dwords so we need to convert to bytes. - * We also need to account for CRC which would be tacked on by hardware. - */ - int packet_len = (count << 2) + 4; - int ret; - - snoop_dbg("ACK OUT: len %d", packet_len); - - if (!dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = dd->hfi1_snoop.filter_callback( - (struct hfi1_ib_header *)from, - NULL, - dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(packet_len, 0, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n"); - goto inline_pio_out; - } - - s_packet->total_len = packet_len + md_len; - - /* Fill in the metadata for the packet */ - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - md.u.pbc = pbc; - memcpy(s_packet->data, &md, md_len); - } - - /* Add the packet data which is a single buffer */ - memcpy(s_packet->data + md_len, from, packet_len); - - snoop_list_add_tail(s_packet, dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) { - snoop_dbg("Dropping packet"); - return; - } - break; - default: - break; - } - -inline_pio_out: - pio_copy(dd, pbuf, pbc, from, count); -} diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c deleted file mode 100644 index bd8771570f81..000000000000 --- a/drivers/staging/rdma/hfi1/eprom.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#include <linux/delay.h> -#include "hfi.h" -#include "common.h" -#include "eprom.h" - -/* - * The EPROM is logically divided into three partitions: - * partition 0: the first 128K, visible from PCI ROM BAR - * partition 1: 4K config file (sector size) - * partition 2: the rest - */ -#define P0_SIZE (128 * 1024) -#define P1_SIZE (4 * 1024) -#define P1_START P0_SIZE -#define P2_START (P0_SIZE + P1_SIZE) - -/* erase sizes supported by the controller */ -#define SIZE_4KB (4 * 1024) -#define MASK_4KB (SIZE_4KB - 1) - -#define SIZE_32KB (32 * 1024) -#define MASK_32KB (SIZE_32KB - 1) - -#define SIZE_64KB (64 * 1024) -#define MASK_64KB (SIZE_64KB - 1) - -/* controller page size, in bytes */ -#define EP_PAGE_SIZE 256 -#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1) - -/* controller commands */ -#define CMD_SHIFT 24 -#define CMD_NOP (0) -#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr) -#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) -#define CMD_READ_SR1 ((0x05 << CMD_SHIFT)) -#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_4KB(addr) ((0x20 << CMD_SHIFT) | addr) -#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr) -#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT)) -#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT)) -#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr) - -/* controller interface speeds */ -#define EP_SPEED_FULL 0x2 /* full speed */ - -/* controller status register 1 bits */ -#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */ - -/* sleep length while waiting for controller */ -#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */ -#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US)) - -/* GPIO pins */ -#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */ - -/* - * How long to wait for the EPROM to become available, in ms. - * The spec 32 Mb EPROM takes around 40s to erase then write. - * Double it for safety. - */ -#define EPROM_TIMEOUT 80000 /* ms */ - -/* - * Turn on external enable line that allows writing on the flash. - */ -static void write_enable(struct hfi1_devdata *dd) -{ - /* raise signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N); - /* raise enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N); -} - -/* - * Turn off external enable line that allows writing on the flash. - */ -static void write_disable(struct hfi1_devdata *dd) -{ - /* lower signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N); - /* lower enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N); -} - -/* - * Wait for the device to become not busy. Must be called after all - * write or erase operations. - */ -static int wait_for_not_busy(struct hfi1_devdata *dd) -{ - unsigned long count = 0; - u64 reg; - int ret = 0; - - /* starts page mode */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1); - while (1) { - udelay(WAIT_SLEEP_US); - usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5); - count++; - reg = read_csr(dd, ASIC_EEP_DATA); - if ((reg & SR1_BUSY) == 0) - break; - /* 200s is the largest time for a 128Mb device */ - if (count > COUNT_DELAY_SEC(200)) { - dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n"); - ret = -ETIMEDOUT; - break; /* break, not goto - must stop page mode */ - } - } - - /* stop page mode with a NOP */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); - - return ret; -} - -/* - * Read the device ID from the SPI controller. - */ -static u32 read_device_id(struct hfi1_devdata *dd) -{ - /* read the Manufacture Device ID */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID); - return (u32)read_csr(dd, ASIC_EEP_DATA); -} - -/* - * Erase the whole flash. - */ -static int erase_chip(struct hfi1_devdata *dd) -{ - int ret; - - write_enable(dd); - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE); - ret = wait_for_not_busy(dd); - - write_disable(dd); - - return ret; -} - -/* - * Erase a range. - */ -static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len) -{ - u32 end = start + len; - int ret = 0; - - if (end < start) - return -EINVAL; - - /* check the end points for the minimum erase */ - if ((start & MASK_4KB) || (end & MASK_4KB)) { - dd_dev_err(dd, - "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n", - __func__, start, end); - return -EINVAL; - } - - write_enable(dd); - - while (start < end) { - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - /* check in order of largest to smallest */ - if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_64KB(start)); - start += SIZE_64KB; - } else if (((start & MASK_32KB) == 0) && - (start + SIZE_32KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_32KB(start)); - start += SIZE_32KB; - } else { /* 4KB will work */ - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_4KB(start)); - start += SIZE_4KB; - } - ret = wait_for_not_busy(dd); - if (ret) - goto done; - } - -done: - write_disable(dd); - - return ret; -} - -/* - * Read a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); - for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++) - result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ -} - -/* - * Read length bytes starting at offset. Copy to user address addr. - */ -static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - read_page(dd, start + offset, buffer); - if (copy_to_user((void __user *)(addr + offset), - buffer, EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - } - -done: - return ret; -} - -/* - * Write a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_DATA, data[0]); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset)); - for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++) - write_csr(dd, ASIC_EEP_DATA, data[i]); - /* will close the open page */ - return wait_for_not_busy(dd); -} - -/* - * Write length bytes starting at offset. Read from user address addr. - */ -static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - write_enable(dd); - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - if (copy_from_user(buffer, (void __user *)(addr + offset), - EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - ret = write_page(dd, start + offset, buffer); - if (ret) - goto done; - } - -done: - write_disable(dd); - return ret; -} - -/* convert an range composite to a length, in bytes */ -static inline u32 extract_rlen(u32 composite) -{ - return (composite & 0xffff) * EP_PAGE_SIZE; -} - -/* convert an range composite to a start, in bytes */ -static inline u32 extract_rstart(u32 composite) -{ - return (composite >> 16) * EP_PAGE_SIZE; -} - -/* - * Perform the given operation on the EPROM. Called from user space. The - * user credentials have already been checked. - * - * Return 0 on success, -ERRNO on error - */ -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) -{ - struct hfi1_devdata *dd; - u32 dev_id; - u32 rlen; /* range length */ - u32 rstart; /* range start */ - int i_minor; - int ret = 0; - - /* - * Map the device file to device data using the relative minor. - * The device file minor number is the unit number + 1. 0 is - * the generic device file - reject it. - */ - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - if (i_minor <= 0) - return -EINVAL; - dd = hfi1_lookup(i_minor - 1); - if (!dd) { - pr_err("%s: cannot find unit %d!\n", __func__, i_minor); - return -EINVAL; - } - - /* some devices do not have an EPROM */ - if (!dd->eprom_available) - return -EOPNOTSUPP; - - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, "%s: unable to acquire EPROM resource\n", - __func__); - goto done_asic; - } - - dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n", - __func__, cmd->type, cmd->len, cmd->addr); - - switch (cmd->type) { - case HFI1_CMD_EP_INFO: - if (cmd->len != sizeof(u32)) { - ret = -ERANGE; - break; - } - dev_id = read_device_id(dd); - /* addr points to a u32 user buffer */ - if (copy_to_user((void __user *)cmd->addr, &dev_id, - sizeof(u32))) - ret = -EFAULT; - break; - - case HFI1_CMD_EP_ERASE_CHIP: - ret = erase_chip(dd); - break; - - case HFI1_CMD_EP_ERASE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = erase_range(dd, rstart, rlen); - break; - - case HFI1_CMD_EP_READ_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = read_length(dd, rstart, rlen, cmd->addr); - break; - - case HFI1_CMD_EP_WRITE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = write_length(dd, rstart, rlen, cmd->addr); - break; - - default: - dd_dev_err(dd, "%s: unexpected command %d\n", - __func__, cmd->type); - ret = -EINVAL; - break; - } - - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} - -/* - * Initialize the EPROM handler. - */ -int eprom_init(struct hfi1_devdata *dd) -{ - int ret = 0; - - /* only the discrete chip has an EPROM */ - if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) - return 0; - - /* - * It is OK if both HFIs reset the EPROM as long as they don't - * do it at the same time. - */ - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, - "%s: unable to acquire EPROM resource, no EPROM support\n", - __func__); - goto done_asic; - } - - /* reset EPROM to be sure it is in a good state */ - - /* set reset */ - write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); - /* clear reset, set speed */ - write_csr(dd, ASIC_EEP_CTL_STAT, - EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); - - /* wake the device with command "release powerdown NoID" */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); - - dd->eprom_available = true; - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 0f3daae44bf9..b13419ce99ff 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -103,6 +103,9 @@ enum { IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, IB_OPCODE_COMPARE_SWAP = 0x13, IB_OPCODE_FETCH_ADD = 0x14, + /* opcode 0x15 is reserved */ + IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, + IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ @@ -129,6 +132,8 @@ enum { IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RC, COMPARE_SWAP), IB_OPCODE(RC, FETCH_ADD), + IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), + IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), /* UC */ IB_OPCODE(UC, SEND_FIRST), diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d57ceee90d26..16274e2133cd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -149,15 +149,15 @@ struct rvt_driver_params { int qpn_res_end; int nports; int npkeys; - u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; - int max_rdma_atomic; int psn_mask; int psn_shift; int psn_modify_mask; u32 core_cap_flags; u32 max_mad_size; + u8 qos_shift; + u8 max_rdma_atomic; }; /* Protection domain */ @@ -426,6 +426,15 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) } /* + * Return the max atomic suitable for determining + * the size of the ack ring buffer in a QP. + */ +static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) +{ + return rdi->dparms.max_rdma_atomic + 1; +} + +/* * Return the indexed PKEY from the port PKEY table. */ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0e1ff2abfe92..6d23b879416a 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -211,8 +211,6 @@ struct rvt_mmap_info { unsigned size; }; -#define RVT_MAX_RDMA_ATOMIC 16 - /* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. @@ -282,8 +280,7 @@ struct rvt_qp { atomic_t refcount ____cacheline_aligned_in_smp; wait_queue_head_t wait; - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; + struct rvt_ack_entry *s_ack_queue; struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index a533cecab14f..98bebf8bef55 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -66,7 +66,7 @@ * The major version changes when data structures change in an incompatible * way. The driver must be the same for initialization to succeed. */ -#define HFI1_USER_SWMAJOR 5 +#define HFI1_USER_SWMAJOR 6 /* * Minor version differences are always compatible @@ -75,7 +75,12 @@ * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define HFI1_USER_SWMINOR 0 +#define HFI1_USER_SWMINOR 1 + +/* + * We will encode the major/minor inside a single 32bit version number. + */ +#define HFI1_SWMAJOR_SHIFT 16 /* * Set of HW and driver capability/feature bits. @@ -107,19 +112,6 @@ #define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) #define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2) -/* - * If the unit is specified via open, HFI choice is fixed. If port is - * specified, it's also fixed. Otherwise we try to spread contexts - * across ports and HFIs, using different algorithms. WITHIN is - * the old default, prior to this mechanism. - */ -#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then - * ports; this is the default */ -#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin - * active ports within), then next HFI */ -#define HFI1_ALG_COUNT 2 /* number of algorithm choices */ - - /* User commands. */ #define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */ #define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */ @@ -127,7 +119,6 @@ #define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */ #define HFI1_CMD_TID_FREE 5 /* free expected TID entries */ #define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */ -#define HFI1_CMD_SDMA_STATUS_UPD 7 /* force update of SDMA status ring */ #define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */ #define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */ @@ -135,13 +126,46 @@ #define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ #define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ #define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ -/* separate EPROM commands from normal PSM commands */ -#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ -#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ -/* range 66-74 no longer used */ -#define HFI1_CMD_EP_ERASE_RANGE 75 /* erase EPROM range */ -#define HFI1_CMD_EP_READ_RANGE 76 /* read EPROM range */ -#define HFI1_CMD_EP_WRITE_RANGE 77 /* write EPROM range */ +#define HFI1_CMD_GET_VERS 14 /* get the version of the user cdev */ + +/* + * User IOCTLs can not go above 128 if they do then see common.h and change the + * base for the snoop ioctl + */ +#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */ + +/* + * Make the ioctls occupy the last 0xf0-0xff portion of the IB range + */ +#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0) + +struct hfi1_cmd; +#define HFI1_IOCTL_ASSIGN_CTXT \ + _IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info) +#define HFI1_IOCTL_CTXT_INFO \ + _IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info) +#define HFI1_IOCTL_USER_INFO \ + _IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info) +#define HFI1_IOCTL_TID_UPDATE \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info) +#define HFI1_IOCTL_TID_FREE \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info) +#define HFI1_IOCTL_CREDIT_UPD \ + _IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD)) +#define HFI1_IOCTL_RECV_CTRL \ + _IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int) +#define HFI1_IOCTL_POLL_TYPE \ + _IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int) +#define HFI1_IOCTL_ACK_EVENT \ + _IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long) +#define HFI1_IOCTL_SET_PKEY \ + _IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16) +#define HFI1_IOCTL_CTXT_RESET \ + _IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET)) +#define HFI1_IOCTL_TID_INVAL_READ \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info) +#define HFI1_IOCTL_GET_VERS \ + _IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int) #define _HFI1_EVENT_FROZEN_BIT 0 #define _HFI1_EVENT_LINKDOWN_BIT 1 @@ -199,9 +223,7 @@ struct hfi1_user_info { * Should be set to HFI1_USER_SWVERSION. */ __u32 userversion; - __u16 pad; - /* HFI selection algorithm, if unit has not selected */ - __u16 hfi1_alg; + __u32 pad; /* * If two or more processes wish to share a context, each process * must set the subcontext_cnt and subcontext_id to the same @@ -243,12 +265,6 @@ struct hfi1_tid_info { __u32 length; }; -struct hfi1_cmd { - __u32 type; /* command type */ - __u32 len; /* length of struct pointed to by add */ - __u64 addr; /* pointer to user structure */ -}; - enum hfi1_sdma_comp_state { FREE = 0, QUEUED, |