gpu: host1x: Syncpoint interrupt sharding

Support sharded syncpoint interrupts on Tegra234+. This feature allows specifying one of eight interrupt lines for each syncpoint to lower processing latency of syncpoint threshold interrupts. Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230901114008.672433-1-cyndis@kapsi.fi
author: Mikko Perttunen <mperttunen@nvidia.com> 2023-09-01 14:40:07 +0300
committer: Thierry Reding <treding@nvidia.com> 2023-10-11 22:52:44 +0200
commit: f017f1e9cb3458a86f586a171e284e2ec46286db (patch)
tree: db1371d5319f1597a2b84f4b9c433c0c5d42e948
parent: b7c00cdf6df58b1d48d31d57f9431a200b0d0c88 (diff)
3 files changed, 61 insertions, 17 deletions
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index b8ac44e7d11a..42fd504abbcd 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -488,7 +488,7 @@ static int host1x_get_resets(struct host1x *host)
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
-	int err;
+	int err, i;
 
 	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 	if (!host)
@@ -516,9 +516,30 @@ static int host1x_probe(struct platform_device *pdev)
 			return PTR_ERR(host->regs);
 	}
 
-	host->syncpt_irq = platform_get_irq(pdev, 0);
-	if (host->syncpt_irq < 0)
-		return host->syncpt_irq;
+	for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) {
+		char irq_name[] = "syncptX";
+
+		sprintf(irq_name, "syncpt%d", i);
+
+		err = platform_get_irq_byname_optional(pdev, irq_name);
+		if (err == -ENXIO)
+			break;
+		if (err < 0)
+			return err;
+
+		host->syncpt_irqs[i] = err;
+	}
+
+	host->num_syncpt_irqs = i;
+
+	/* Device tree without irq names */
+	if (i == 0) {
+		host->syncpt_irqs[0] = platform_get_irq(pdev, 0);
+		if (host->syncpt_irqs[0] < 0)
+			return host->syncpt_irqs[0];
+
+		host->num_syncpt_irqs = 1;
+	}
 
 	mutex_init(&host->devices_lock);
 	INIT_LIST_HEAD(&host->devices);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 75de50fe03d0..c8e302de7625 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -124,7 +124,8 @@ struct host1x {
 	void __iomem *regs;
 	void __iomem *hv_regs; /* hypervisor region */
 	void __iomem *common_regs;
-	int syncpt_irq;
+	int syncpt_irqs[8];
+	int num_syncpt_irqs;
 	struct host1x_syncpt *syncpt;
 	struct host1x_syncpt_base *bases;
 	struct device *dev;
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index b915ef7d0348..9880e0c47235 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -13,13 +13,20 @@
 #include "../intr.h"
 #include "../dev.h"
 
+struct host1x_intr_irq_data {
+	struct host1x *host;
+	u32 offset;
+};
+
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
-	struct host1x *host = dev_id;
+	struct host1x_intr_irq_data *irq_data = dev_id;
+	struct host1x *host = irq_data->host;
 	unsigned long reg;
 	unsigned int i, id;
 
-	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+	for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
+	     i += host->num_syncpt_irqs) {
 		reg = host1x_sync_readl(host,
 			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
 
@@ -67,26 +74,41 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
 
 	/*
 	 * Program threshold interrupt destination among 8 lines per VM,
-	 * per syncpoint. For now, just direct all to the first interrupt
-	 * line.
+	 * per syncpoint. For each group of 32 syncpoints (corresponding to one
+	 * interrupt status register), direct to one interrupt line, going
+	 * around in a round robin fashion.
 	 */
-	for (id = 0; id < host->info->nb_pts; id++)
-		host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+	for (id = 0; id < host->info->nb_pts; id++) {
+		u32 reg_offset = id / 32;
+		u32 irq_index = reg_offset % host->num_syncpt_irqs;
+
+		host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+	}
 #endif
 }
 
 static int
 host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-	int err;
+	int err, i;
+	struct host1x_intr_irq_data *irq_data;
+
+	irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL);
+	if (!irq_data)
+		return -ENOMEM;
 
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
 
-	err = devm_request_irq(host->dev, host->syncpt_irq,
-			       syncpt_thresh_isr, IRQF_SHARED,
-			       "host1x_syncpt", host);
-	if (err < 0)
-		return err;
+	for (i = 0; i < host->num_syncpt_irqs; i++) {
+		irq_data[i].host = host;
+		irq_data[i].offset = i;
+
+		err = devm_request_irq(host->dev, host->syncpt_irqs[i],
+				       syncpt_thresh_isr, IRQF_SHARED,
+				       "host1x_syncpt", &irq_data[i]);
+		if (err < 0)
+			return err;
+	}
 
 	intr_hw_init(host, cpm);
author	Mikko Perttunen <mperttunen@nvidia.com>	2023-09-01 14:40:07 +0300
committer	Thierry Reding <treding@nvidia.com>	2023-10-11 22:52:44 +0200
commit	f017f1e9cb3458a86f586a171e284e2ec46286db (patch)
tree	db1371d5319f1597a2b84f4b9c433c0c5d42e948
parent	b7c00cdf6df58b1d48d31d57f9431a200b0d0c88 (diff)