diff options
-rw-r--r-- | drivers/misc/habanalabs/common/command_buffer.c | 31 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 111 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 5 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/irq.c | 8 | ||||
-rw-r--r-- | include/uapi/misc/habanalabs.h | 61 |
5 files changed, 189 insertions, 27 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index c591f0487272..d4eb9fb9ea12 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -380,8 +380,9 @@ int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) } static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u64 cb_handle, u32 *usage_cnt) + u64 cb_handle, u32 flags, u32 *usage_cnt, u64 *device_va) { + struct hl_vm_va_block *va_block; struct hl_cb *cb; u32 handle; int rc = 0; @@ -402,7 +403,18 @@ static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, goto out; } - *usage_cnt = atomic_read(&cb->cs_cnt); + if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { + va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); + if (va_block) { + *device_va = va_block->start; + } else { + dev_err(hdev->dev, "CB is not mapped to the device's MMU\n"); + rc = -EINVAL; + goto out; + } + } else { + *usage_cnt = atomic_read(&cb->cs_cnt); + } out: spin_unlock(&mgr->cb_lock); @@ -414,7 +426,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) union hl_cb_args *args = data; struct hl_device *hdev = hpriv->hdev; enum hl_device_status status; - u64 handle = 0; + u64 handle = 0, device_va; u32 usage_cnt = 0; int rc; @@ -450,9 +462,16 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) case HL_CB_OP_INFO: rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle, - &usage_cnt); - memset(args, 0, sizeof(*args)); - args->out.usage_cnt = usage_cnt; + args->in.flags, + &usage_cnt, + &device_va); + + memset(&args->out, 0, sizeof(args->out)); + + if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA) + args->out.device_va = device_va; + else + args->out.usage_cnt = usage_cnt; break; default: diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index b9fed6b6d1ab..7073fa6b9f0f 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2845,6 +2845,106 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) } static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + struct hl_cb_mgr *cb_mgr, u64 timeout_us, + u64 cq_counters_handle, u64 cq_counters_offset, + u64 target_value, struct hl_user_interrupt *interrupt, + u32 *status, + u64 *timestamp) +{ + struct hl_user_pending_interrupt *pend; + unsigned long timeout, flags; + long completion_rc; + struct hl_cb *cb; + int rc = 0; + u32 handle; + + timeout = hl_usecs64_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + cq_counters_handle >>= PAGE_SHIFT; + handle = (u32) cq_counters_handle; + + cb = hl_cb_get(hdev, cb_mgr, handle); + if (!cb) { + hl_ctx_put(ctx); + return -EINVAL; + } + + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + hl_cb_put(cb); + hl_ctx_put(ctx); + return -ENOMEM; + } + + hl_fence_init(&pend->fence, ULONG_MAX); + + pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset; + pend->cq_target_value = target_value; + + /* We check for completion value as interrupt could have been received + * before we added the node to the wait list + */ + if (*pend->cq_kernel_addr >= target_value) { + *status = HL_WAIT_CS_STATUS_COMPLETED; + /* There was no interrupt, we assume the completion is now. */ + pend->fence.timestamp = ktime_get(); + } + + if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) + goto set_timestamp; + + /* Add pending user interrupt to relevant list for the interrupt + * handler to monitor + */ + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + + /* Wait for interrupt handler to signal completion */ + completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, + timeout); + if (completion_rc > 0) { + *status = HL_WAIT_CS_STATUS_COMPLETED; + } else { + if (completion_rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for interrupt ID %d\n", + interrupt->interrupt_id); + rc = -EINTR; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + if (pend->fence.error == -EIO) { + dev_err_ratelimited(hdev->dev, + "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", + pend->fence.error); + rc = -EIO; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n", + interrupt->interrupt_id); + rc = -ETIMEDOUT; + } + *status = HL_WAIT_CS_STATUS_BUSY; + } + } + + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_del(&pend->wait_list_node); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + +set_timestamp: + *timestamp = ktime_to_ns(pend->fence.timestamp); + + kfree(pend); + hl_cb_put(cb); + hl_ctx_put(ctx); + + return rc; +} + +static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 user_address, u64 target_value, struct hl_user_interrupt *interrupt, @@ -2861,7 +2961,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, hl_ctx_get(hdev, ctx); - pend = kmalloc(sizeof(*pend), GFP_KERNEL); + pend = kzalloc(sizeof(*pend), GFP_KERNEL); if (!pend) { hl_ctx_put(ctx); return -ENOMEM; @@ -2990,7 +3090,14 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) else interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt]; - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, + if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, + args->in.interrupt_timeout_us, args->in.cq_counters_handle, + args->in.cq_counters_offset, + args->in.target, interrupt, &status, + ×tamp); + else + rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt, &status, ×tamp); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 4d4986177776..78772fe548b9 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -876,10 +876,15 @@ struct hl_user_interrupt { * pending on an interrupt * @wait_list_node: node in the list of user threads pending on an interrupt * @fence: hl fence object for interrupt completion + * @cq_target_value: CQ target value + * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt + * handler for taget value comparison */ struct hl_user_pending_interrupt { struct list_head wait_list_node; struct hl_fence fence; + u64 cq_target_value; + u64 *cq_kernel_addr; }; /** diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 64e0d9de21bd..6454ea12bf3a 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -145,8 +145,12 @@ static void handle_user_cq(struct hl_device *hdev, spin_lock(&user_cq->wait_list_lock); list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) { - pend->fence.timestamp = now; - complete_all(&pend->fence.completion); + if ((pend->cq_kernel_addr && + *(pend->cq_kernel_addr) >= pend->cq_target_value) || + !pend->cq_kernel_addr) { + pend->fence.timestamp = now; + complete_all(&pend->fence.completion); + } } spin_unlock(&user_cq->wait_list_lock); } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 648850b954a3..371dfc4243b3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -680,7 +680,10 @@ struct hl_info_args { #define HL_MAX_CB_SIZE (0x200000 - 32) /* Indicates whether the command buffer should be mapped to the device's MMU */ -#define HL_CB_FLAGS_MAP 0x1 +#define HL_CB_FLAGS_MAP 0x1 + +/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ +#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ @@ -702,11 +705,16 @@ struct hl_cb_out { /* Handle of CB */ __u64 cb_handle; - /* Information about CB */ - struct { - /* Usage count of CB */ - __u32 usage_cnt; - __u32 pad; + union { + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + + /* CB mapped address to device MMU */ + __u64 device_va; }; }; }; @@ -947,9 +955,10 @@ union hl_cs_args { struct hl_cs_out out; }; -#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 -#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 -#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 @@ -969,14 +978,23 @@ struct hl_wait_cs_in { }; struct { - /* User address for completion comparison. - * upon interrupt, driver will compare the value pointed - * by this address with the supplied target value. - * in order not to perform any comparison, set address - * to all 1s. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 addr; + union { + /* User address for completion comparison. + * upon interrupt, driver will compare the value pointed + * by this address with the supplied target value. + * in order not to perform any comparison, set address + * to all 1s. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; + + /* cq_counters_handle to a kernel mapped cb which contains + * cq counters. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_handle; + }; + /* Target value for completion comparison */ __u64 target; }; @@ -1004,6 +1022,15 @@ struct hl_wait_cs_in { */ __u64 interrupt_timeout_us; }; + + /* + * cq counter offset inside the counters cb pointed by cq_counters_handle above. + * upon interrupt, driver will compare the value pointed + * by this address (cq_counters_handle + cq_counters_offset) + * with the supplied target value. + * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_offset; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 |