diff options
-rw-r--r-- | drivers/misc/habanalabs/command_submission.c | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/firmware_if.c | 22 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 5 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 16 |
4 files changed, 22 insertions, 23 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 6ad83d5ef4b0..f00d1c32f6d6 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -683,7 +683,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = hl_poll_timeout_memory(hdev, &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), - 100, jiffies_to_usecs(hdev->timeout_jiffies)); + 100, jiffies_to_usecs(hdev->timeout_jiffies), false); if (rc == -ETIMEDOUT) { dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index cc8168bacb24..ea2ca67fbfbf 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -24,7 +24,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, { const struct firmware *fw; const u64 *fw_data; - size_t fw_size, i; + size_t fw_size; int rc; rc = request_firmware(&fw, fw_name, hdev->dev); @@ -45,22 +45,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, fw_data = (const u64 *) fw->data; - if ((fw->size % 8) != 0) - fw_size -= 8; - - for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) { - if (!(i & (0x80000 - 1))) { - dev_dbg(hdev->dev, - "copied so far %zu out of %zu for %s firmware", - i, fw_size, fw_name); - usleep_range(20, 100); - } - - writeq(*fw_data, dst); - } - - if ((fw->size % 8) != 0) - writel(*(const u32 *) fw_data, dst); + memcpy_toio(dst, fw_data, fw_size); out: release_firmware(fw); @@ -112,7 +97,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, } rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, - (tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout); + (tmp == ARMCP_PACKET_FENCE_VAL), 1000, + timeout, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 1a2c062a57d4..a0e181714891 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2864,7 +2864,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) } rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, - (tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout); + (tmp == GOYA_QMAN0_FENCE_VAL), 1000, + timeout, true); hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0); @@ -2945,7 +2946,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) } rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), - 1000, GOYA_TEST_QUEUE_WAIT_USEC); + 1000, GOYA_TEST_QUEUE_WAIT_USEC, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 10da9940ee0d..6a4c64b97f38 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1062,9 +1062,17 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); /* * address in this macro points always to a memory location in the * host's (server's) memory. That location is updated asynchronously - * either by the direct access of the device or by another core + * either by the direct access of the device or by another core. + * + * To work both in LE and BE architectures, we need to distinguish between the + * two states (device or another core updates the memory location). Therefore, + * if mem_written_by_device is true, the host memory being polled will be + * updated directly by the device. If false, the host memory being polled will + * be updated by host CPU. Required so host knows whether or not the memory + * might need to be byte-swapped before returning value to caller. */ -#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \ +#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ + mem_written_by_device) \ ({ \ ktime_t __timeout; \ /* timeout should be longer when working with simulator */ \ @@ -1077,10 +1085,14 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); /* Verify we read updates done by other cores or by device */ \ mb(); \ (val) = *((u32 *) (uintptr_t) (addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(val); \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = *((u32 *) (uintptr_t) (addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(val); \ break; \ } \ if (sleep_us) \ |