diff options
Diffstat (limited to 'drivers/accel/habanalabs/common/memory.c')
-rw-r--r-- | drivers/accel/habanalabs/common/memory.c | 308 |
1 files changed, 166 insertions, 142 deletions
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 4fc72a07d2f5..0b8689fe0b64 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, *p_userptr = userptr; - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto dma_map_err; @@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, * physical pages * * This function does the following: - * - Pin the physical pages related to the given virtual block. * - Create a physical page pack from the physical pages related to the given * virtual block. */ @@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, } static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, - u64 page_size, u64 exported_size, + u64 page_size, u64 exported_size, u64 offset, struct device *dev, enum dma_data_direction dir) { - u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages; - struct asic_fixed_properties *prop; - int rc, i, j, nents, cur_page; + u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page, + left_size_in_dma_seg, device_address, bar_address, start_page; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; + unsigned int nents, i; struct sg_table *sgt; + bool next_sg_entry; + int rc; - prop = &hdev->asic_prop; - - dma_max_seg_size = dma_get_max_seg_size(dev); - - /* We would like to align the max segment size to PAGE_SIZE, so the - * SGL will contain aligned addresses that can be easily mapped to - * an MMU - */ - dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE); + /* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */ + dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE); if (dma_max_seg_size < PAGE_SIZE) { dev_err_ratelimited(hdev->dev, "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n", @@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 if (!sgt) return ERR_PTR(-ENOMEM); - /* remove export size restrictions in case not explicitly defined */ - cur_size_to_export = exported_size ? exported_size : (npages * page_size); - - /* If the size of each page is larger than the dma max segment size, - * then we can't combine pages and the number of entries in the SGL - * will just be the - * <number of pages> * <chunks of max segment size in each page> - */ - if (page_size > dma_max_seg_size) { - /* we should limit number of pages according to the exported size */ - cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size); - nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size); - } else { - cur_npages = npages; - - /* Get number of non-contiguous chunks */ - for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) { - if (pages[i - 1] + page_size != pages[i] || - chunk_size + page_size > dma_max_seg_size) { - nents++; - chunk_size = page_size; - continue; - } + /* Use the offset to move to the actual first page that is exported */ + for (start_page = 0 ; start_page < npages ; ++start_page) { + if (offset < page_size) + break; - chunk_size += page_size; - } + /* The offset value was validated so there can't be an underflow */ + offset -= page_size; } - rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); - if (rc) - goto error_free; + /* Calculate the required number of entries for the SG table */ + curr_page = start_page; + nents = 1; + left_size_to_export = exported_size; + left_size_in_page = page_size - offset; + left_size_in_dma_seg = dma_max_seg_size; + next_sg_entry = false; - cur_page = 0; + while (true) { + size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); + left_size_to_export -= size; + left_size_in_page -= size; + left_size_in_dma_seg -= size; - if (page_size > dma_max_seg_size) { - u64 size_left, cur_device_address = 0; + if (!left_size_to_export) + break; - size_left = page_size; + if (!left_size_in_page) { + /* left_size_to_export is not zero so there must be another page */ + if (pages[curr_page] + page_size != pages[curr_page + 1]) + next_sg_entry = true; - /* Need to split each page into the number of chunks of - * dma_max_seg_size - */ - for_each_sgtable_dma_sg(sgt, sg, i) { - if (size_left == page_size) - cur_device_address = - pages[cur_page] - prop->dram_base_address; - else - cur_device_address += dma_max_seg_size; + ++curr_page; + left_size_in_page = page_size; + } - /* make sure not to export over exported size */ - chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export); + if (!left_size_in_dma_seg) { + next_sg_entry = true; + left_size_in_dma_seg = dma_max_seg_size; + } - bar_address = hdev->dram_pci_bar_start + cur_device_address; + if (next_sg_entry) { + ++nents; + next_sg_entry = false; + } + } - rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); - if (rc) - goto error_unmap; + rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); + if (rc) + goto err_free_sgt; - cur_size_to_export -= chunk_size; + /* Prepare the SG table entries */ + curr_page = start_page; + device_address = pages[curr_page] + offset; + left_size_to_export = exported_size; + left_size_in_page = page_size - offset; + left_size_in_dma_seg = dma_max_seg_size; + next_sg_entry = false; - if (size_left > dma_max_seg_size) { - size_left -= dma_max_seg_size; - } else { - cur_page++; - size_left = page_size; + for_each_sgtable_dma_sg(sgt, sg, i) { + bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address); + chunk_size = 0; + + for ( ; curr_page < npages ; ++curr_page) { + size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); + chunk_size += size; + left_size_to_export -= size; + left_size_in_page -= size; + left_size_in_dma_seg -= size; + + if (!left_size_to_export) + break; + + if (!left_size_in_page) { + /* left_size_to_export is not zero so there must be another page */ + if (pages[curr_page] + page_size != pages[curr_page + 1]) { + device_address = pages[curr_page + 1]; + next_sg_entry = true; + } + + left_size_in_page = page_size; } - } - } else { - /* Merge pages and put them into the scatterlist */ - for_each_sgtable_dma_sg(sgt, sg, i) { - chunk_size = page_size; - for (j = cur_page + 1 ; j < cur_npages ; j++) { - if (pages[j - 1] + page_size != pages[j] || - chunk_size + page_size > dma_max_seg_size) - break; - - chunk_size += page_size; + + if (!left_size_in_dma_seg) { + /* + * Skip setting a new device address if already moving to a page + * which is not contiguous with the current page. + */ + if (!next_sg_entry) { + device_address += chunk_size; + next_sg_entry = true; + } + + left_size_in_dma_seg = dma_max_seg_size; } - bar_address = hdev->dram_pci_bar_start + - (pages[cur_page] - prop->dram_base_address); + if (next_sg_entry) { + next_sg_entry = false; + break; + } + } - /* make sure not to export over exported size */ - chunk_size = min(chunk_size, cur_size_to_export); - rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); - if (rc) - goto error_unmap; + rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); + if (rc) + goto err_unmap; + } - cur_size_to_export -= chunk_size; - cur_page = j; - } + /* There should be nothing left to export exactly after looping over all SG elements */ + if (left_size_to_export) { + dev_err(hdev->dev, + "left size to export %#llx after initializing %u SG elements\n", + left_size_to_export, sgt->nents); + rc = -ENOMEM; + goto err_unmap; } - /* Because we are not going to include a CPU list we want to have some - * chance that other users will detect this by setting the orig_nents - * to 0 and using only nents (length of DMA list) when going over the - * sgl + /* + * Because we are not going to include a CPU list, we want to have some chance that other + * users will detect this when going over SG table, by setting the orig_nents to 0 and using + * only nents (length of DMA list). */ sgt->orig_nents = 0; + dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n", + nents, dev_name(dev)); + for_each_sgtable_dma_sg(sgt, sg, i) + dev_dbg(hdev->dev, + "SG entry %d: address %#llx, length %#x\n", + i, sg_dma_address(sg), sg_dma_len(sg)); + return sgt; -error_unmap: +err_unmap: for_each_sgtable_dma_sg(sgt, sg, i) { if (!sg_dma_len(sg)) continue; - dma_unmap_resource(dev, sg_dma_address(sg), - sg_dma_len(sg), dir, + dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir, DMA_ATTR_SKIP_CPU_SYNC); } sg_free_table(sgt); -error_free: +err_free_sgt: kfree(sgt); return ERR_PTR(rc); } @@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf, static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { + u64 *pages, npages, page_size, exported_size, offset; struct dma_buf *dma_buf = attachment->dmabuf; struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_dmabuf_priv *hl_dmabuf; @@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, hl_dmabuf = dma_buf->priv; hdev = hl_dmabuf->ctx->hdev; - phys_pg_pack = hl_dmabuf->phys_pg_pack; if (!attachment->peer2peer) { dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); return ERR_PTR(-EPERM); } - if (phys_pg_pack) - sgt = alloc_sgt_from_device_pages(hdev, - phys_pg_pack->pages, - phys_pg_pack->npages, - phys_pg_pack->page_size, - phys_pg_pack->exported_size, - attachment->dev, - dir); - else - sgt = alloc_sgt_from_device_pages(hdev, - &hl_dmabuf->device_address, - 1, - hl_dmabuf->dmabuf->size, - 0, - attachment->dev, - dir); + exported_size = hl_dmabuf->dmabuf->size; + offset = hl_dmabuf->offset; + phys_pg_pack = hl_dmabuf->phys_pg_pack; + if (phys_pg_pack) { + pages = phys_pg_pack->pages; + npages = phys_pg_pack->npages; + page_size = phys_pg_pack->page_size; + } else { + pages = &hl_dmabuf->device_phys_addr; + npages = 1; + page_size = hl_dmabuf->dmabuf->size; + } + + sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset, + attachment->dev, dir); if (IS_ERR(sgt)) dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt)); @@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) hl_ctx_put(ctx); /* Paired with get_file() in export_dmabuf() */ - fput(ctx->hpriv->filp); + fput(ctx->hpriv->file_priv->filp); kfree(hl_dmabuf); } @@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx, * released first and only then the compute device. * Paired with fput() in hl_release_dmabuf(). */ - get_file(ctx->hpriv->filp); + get_file(ctx->hpriv->file_priv->filp); *dmabuf_fd = fd; @@ -1876,22 +1898,29 @@ err_dma_buf_put: return rc; } -static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size) +static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) { - if (!IS_ALIGNED(device_addr, PAGE_SIZE)) { + if (!PAGE_ALIGNED(addr)) { dev_dbg(hdev->dev, - "exported device memory address 0x%llx should be aligned to 0x%lx\n", - device_addr, PAGE_SIZE); + "exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n", + addr, PAGE_SIZE); return -EINVAL; } - if (size < PAGE_SIZE) { + if (!size || !PAGE_ALIGNED(size)) { dev_dbg(hdev->dev, - "exported device memory size %llu should be equal to or greater than %lu\n", + "exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n", size, PAGE_SIZE); return -EINVAL; } + if (!PAGE_ALIGNED(offset)) { + dev_dbg(hdev->dev, + "exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n", + offset, PAGE_SIZE); + return -EINVAL; + } + return 0; } @@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr u64 bar_address; int rc; - rc = validate_export_params_common(hdev, device_addr, size); + rc = validate_export_params_common(hdev, device_addr, size, 0); if (rc) return rc; if (device_addr < prop->dram_user_base_address || - (device_addr + size) > prop->dram_end_address || - (device_addr + size) < device_addr) { + (device_addr + size) > prop->dram_end_address || + (device_addr + size) < device_addr) { dev_dbg(hdev->dev, "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n", device_addr, size); @@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s u64 bar_address; int i, rc; - rc = validate_export_params_common(hdev, device_addr, size); + rc = validate_export_params_common(hdev, device_addr, size, offset); if (rc) return rc; if ((offset + size) > phys_pg_pack->total_size) { dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n", - offset, size, phys_pg_pack->total_size); + offset, size, phys_pg_pack->total_size); return -EINVAL; } for (i = 0 ; i < phys_pg_pack->npages ; i++) { - bar_address = hdev->dram_pci_bar_start + - (phys_pg_pack->pages[i] - prop->dram_base_address); + (phys_pg_pack->pages[i] - prop->dram_base_address); if ((bar_address + phys_pg_pack->page_size) > (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || (bar_address + phys_pg_pack->page_size) < bar_address) { dev_dbg(hdev->dev, "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n", - phys_pg_pack->pages[i], - phys_pg_pack->page_size); - + phys_pg_pack->pages[i], phys_pg_pack->page_size); return -EINVAL; } } @@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o struct asic_fixed_properties *prop; struct hl_dmabuf_priv *hl_dmabuf; struct hl_device *hdev; - u64 export_addr; int rc; hdev = ctx->hdev; @@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o return -EINVAL; } - export_addr = addr + offset; - hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); if (!hl_dmabuf) return -ENOMEM; @@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o rc = PTR_ERR(phys_pg_pack); goto dec_memhash_export_cnt; } - rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack); + rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack); if (rc) goto dec_memhash_export_cnt; - phys_pg_pack->exported_size = size; hl_dmabuf->phys_pg_pack = phys_pg_pack; hl_dmabuf->memhash_hnode = hnode; + hl_dmabuf->offset = offset; } else { - rc = validate_export_params_no_mmu(hdev, export_addr, size); + rc = validate_export_params_no_mmu(hdev, addr, size); if (rc) goto err_free_dmabuf_wrapper; - } - hl_dmabuf->device_address = export_addr; + hl_dmabuf->device_phys_addr = addr; + } rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd); if (rc) @@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in return 0; } -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; enum hl_device_status status; union hl_mem_args *args = data; struct hl_device *hdev = hpriv->hdev; @@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); + hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); kvfree(userptr->pages); |