diff options
Diffstat (limited to 'drivers/dma/ioat/dma_v3.c')
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 394 |
1 files changed, 373 insertions, 21 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 639311598f35..71e113dfc8cc 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -55,7 +55,7 @@ /* * Support routines for v3+ hardware */ - +#include <linux/module.h> #include <linux/pci.h> #include <linux/gfp.h> #include <linux/dmaengine.h> @@ -70,6 +70,10 @@ /* ioat hardware assumes at least two sources for raid operations */ #define src_cnt_to_sw(x) ((x) + 2) #define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor @@ -77,7 +81,18 @@ static const u8 xor_idx_to_desc = 0xe0; static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +/* + * technically sources 1 and 2 do not require SED, but the op will have + * at least 9 descriptors so that's irrelevant. + */ +static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1 }; static void ioat3_eh(struct ioat2_dma_chan *ioat); @@ -103,6 +118,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) return raw->field[pq_idx_to_field[idx]]; } +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + static void pq_set_src(struct ioat_raw_descriptor *descs[2], dma_addr_t addr, u32 offset, u8 coef, int idx) { @@ -113,6 +135,12 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2], pq->coef[idx] = coef; } +static int sed_get_pq16_pool_idx(int src_cnt) +{ + + return pq16_idx_to_sed[src_cnt]; +} + static bool is_jf_ioat(struct pci_dev *pdev) { switch (pdev->device) { @@ -210,6 +238,52 @@ static bool is_bwd_ioat(struct pci_dev *pdev) } } +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(device->sed_pool, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(device->sed_pool, sed); + return NULL; + } + + return sed; +} + +void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) +{ + if (!sed) + return; + + dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(device->sed_pool, sed); +} + static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, int idx) { @@ -322,6 +396,54 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, } break; } + case IOAT_OP_PQ_16S: + case IOAT_OP_PQ_VAL_16S: { + struct ioat_pq_descriptor *pq = desc->pq; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[4]; + int i; + + /* in the 'continue' case don't unmap the dests as sources */ + if (dmaf_p_disabled_continue(flags)) + src_cnt--; + else if (dmaf_continue(flags)) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *)pq; + descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw); + descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]); + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = pq16_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* the dests are sources in pq validate operations */ + if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, + len, PCI_DMA_TODEVICE, + flags, 0); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, + len, PCI_DMA_TODEVICE, + flags, 0); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + } + break; + } default: dev_err(&pdev->dev, "%s: unknown op type: %#x\n", __func__, desc->hw->ctl_f.op); @@ -386,6 +508,7 @@ static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) { struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *desc; bool seen_current = false; int idx = ioat->tail, i; @@ -430,6 +553,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) BUG_ON(i + 1 >= active); i++; } + + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat3_free_sed(device, desc->sed); + desc->sed = NULL; + } } smp_mb(); /* finish all descriptor reads before incrementing tail */ ioat->tail = idx + i; @@ -522,6 +651,7 @@ static void ioat3_eh(struct ioat2_dma_chan *ioat) } break; case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { *desc->result |= SUM_CHECK_P_RESULT; err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; @@ -814,7 +944,8 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct int i; dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", desc_id(desc), (unsigned long long) desc->txd.phys, (unsigned long long) (pq_ex ? pq_ex->next : pq->next), desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, @@ -829,6 +960,41 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); } +static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + static struct dma_async_tx_descriptor * __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, const dma_addr_t *dst, const dma_addr_t *src, @@ -951,10 +1117,114 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, } static struct dma_async_tx_descriptor * +__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function only handles src_cnt 9 - 16 */ + BUG_ON(src_cnt < 9); + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(chan), "%s\n", __func__); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(device, + sed_get_pq16_pool_idx(src_cnt)); + if (!desc->sed) { + dev_err(to_dev(chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, unsigned long flags) { + struct dma_device *dma = chan->device; + /* specify valid address for disabled result */ if (flags & DMA_PREP_PQ_DISABLE_P) dst[0] = dst[1]; @@ -974,11 +1244,20 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, single_source_coef[0] = scf[0]; single_source_coef[1] = 0; - return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - } else - return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, - len, flags); + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } } struct dma_async_tx_descriptor * @@ -986,6 +1265,8 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags) { + struct dma_device *dma = chan->device; + /* specify valid address for disabled result */ if (flags & DMA_PREP_PQ_DISABLE_P) pq[0] = pq[1]; @@ -997,14 +1278,18 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, */ *pqres = 0; - return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); } static struct dma_async_tx_descriptor * ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) { + struct dma_device *dma = chan->device; unsigned char scf[src_cnt]; dma_addr_t pq[2]; @@ -1013,8 +1298,11 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = dst; /* specify valid address for disabled result */ - return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); } struct dma_async_tx_descriptor * @@ -1022,6 +1310,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags) { + struct dma_device *dma = chan->device; unsigned char scf[src_cnt]; dma_addr_t pq[2]; @@ -1035,8 +1324,12 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = pq[0]; /* specify valid address for disabled result */ - return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, - len, flags); + + return (src_cnt > 8) && (dma->max_pq > 8) ? + __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); } static struct dma_async_tx_descriptor * @@ -1533,11 +1826,17 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) if (cap & IOAT_CAP_PQ) { is_raid_device = true; - dma_set_maxpq(dma, 8, 0); - if (is_xeon_cb32(pdev)) - dma->pq_align = 6; - else + + if (cap & IOAT_CAP_RAID16SS) { + dma_set_maxpq(dma, 16, 0); dma->pq_align = 0; + } else { + dma_set_maxpq(dma, 8, 0); + if (is_xeon_cb32(pdev)) + dma->pq_align = 6; + else + dma->pq_align = 0; + } dma_cap_set(DMA_PQ, dma->cap_mask); dma->device_prep_dma_pq = ioat3_prep_pq; @@ -1546,11 +1845,16 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_pq_val = ioat3_prep_pq_val; if (!(cap & IOAT_CAP_XOR)) { - dma->max_xor = 8; - if (is_xeon_cb32(pdev)) - dma->xor_align = 6; - else + if (cap & IOAT_CAP_RAID16SS) { + dma->max_xor = 16; dma->xor_align = 0; + } else { + dma->max_xor = 8; + if (is_xeon_cb32(pdev)) + dma->xor_align = 6; + else + dma->xor_align = 0; + } dma_cap_set(DMA_XOR, dma->cap_mask); dma->device_prep_dma_xor = ioat3_prep_pqxor; @@ -1578,6 +1882,30 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_pq_val = NULL; } + /* starting with CB3.3 super extended descriptors are supported */ + if (cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; + + /* allocate sw descriptor pool for SED */ + device->sed_pool = kmem_cache_create("ioat_sed", + sizeof(struct ioat_sed_ent), 0, 0, NULL); + if (!device->sed_pool) + return -ENOMEM; + + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); + + /* allocate SED DMA pool */ + device->sed_hw_pool[i] = dma_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!device->sed_hw_pool[i]) + goto sed_pool_cleanup; + + } + } + err = ioat_probe(device); if (err) return err; @@ -1599,4 +1927,28 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) device->dca = ioat3_dca_init(pdev, device->reg_base); return 0; + +sed_pool_cleanup: + if (device->sed_pool) { + int i; + kmem_cache_destroy(device->sed_pool); + + for (i = 0; i < MAX_SED_POOLS; i++) + if (device->sed_hw_pool[i]) + dma_pool_destroy(device->sed_hw_pool[i]); + } + + return -ENOMEM; +} + +void ioat3_dma_remove(struct ioatdma_device *device) +{ + if (device->sed_pool) { + int i; + kmem_cache_destroy(device->sed_pool); + + for (i = 0; i < MAX_SED_POOLS; i++) + if (device->sed_hw_pool[i]) + dma_pool_destroy(device->sed_hw_pool[i]); + } } |