summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/command_submission.c2
-rw-r--r--drivers/misc/habanalabs/firmware_if.c22
-rw-r--r--drivers/misc/habanalabs/goya/goya.c5
-rw-r--r--drivers/misc/habanalabs/habanalabs.h16
4 files changed, 22 insertions, 23 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 6ad83d5ef4b0..f00d1c32f6d6 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -683,7 +683,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = hl_poll_timeout_memory(hdev,
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
- 100, jiffies_to_usecs(hdev->timeout_jiffies));
+ 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
if (rc == -ETIMEDOUT) {
dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index cc8168bacb24..ea2ca67fbfbf 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -24,7 +24,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
{
const struct firmware *fw;
const u64 *fw_data;
- size_t fw_size, i;
+ size_t fw_size;
int rc;
rc = request_firmware(&fw, fw_name, hdev->dev);
@@ -45,22 +45,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
fw_data = (const u64 *) fw->data;
- if ((fw->size % 8) != 0)
- fw_size -= 8;
-
- for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
- if (!(i & (0x80000 - 1))) {
- dev_dbg(hdev->dev,
- "copied so far %zu out of %zu for %s firmware",
- i, fw_size, fw_name);
- usleep_range(20, 100);
- }
-
- writeq(*fw_data, dst);
- }
-
- if ((fw->size % 8) != 0)
- writel(*(const u32 *) fw_data, dst);
+ memcpy_toio(dst, fw_data, fw_size);
out:
release_firmware(fw);
@@ -112,7 +97,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
}
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
- (tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout);
+ (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
+ timeout, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 1a2c062a57d4..a0e181714891 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2864,7 +2864,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
}
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
- (tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout);
+ (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
+ timeout, true);
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
@@ -2945,7 +2946,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
}
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
- 1000, GOYA_TEST_QUEUE_WAIT_USEC);
+ 1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 10da9940ee0d..6a4c64b97f38 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1062,9 +1062,17 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
/*
* address in this macro points always to a memory location in the
* host's (server's) memory. That location is updated asynchronously
- * either by the direct access of the device or by another core
+ * either by the direct access of the device or by another core.
+ *
+ * To work both in LE and BE architectures, we need to distinguish between the
+ * two states (device or another core updates the memory location). Therefore,
+ * if mem_written_by_device is true, the host memory being polled will be
+ * updated directly by the device. If false, the host memory being polled will
+ * be updated by host CPU. Required so host knows whether or not the memory
+ * might need to be byte-swapped before returning value to caller.
*/
-#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \
+#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
+ mem_written_by_device) \
({ \
ktime_t __timeout; \
/* timeout should be longer when working with simulator */ \
@@ -1077,10 +1085,14 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
/* Verify we read updates done by other cores or by device */ \
mb(); \
(val) = *((u32 *) (uintptr_t) (addr)); \
+ if (mem_written_by_device) \
+ (val) = le32_to_cpu(val); \
if (cond) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
(val) = *((u32 *) (uintptr_t) (addr)); \
+ if (mem_written_by_device) \
+ (val) = le32_to_cpu(val); \
break; \
} \
if (sleep_us) \