diff options
-rw-r--r-- | arch/arc/include/asm/arcregs.h | 1 | ||||
-rw-r--r-- | arch/arc/include/asm/cache.h | 8 | ||||
-rw-r--r-- | arch/arc/mm/cache.c | 114 | ||||
-rw-r--r-- | arch/arc/mm/dma.c | 18 |
4 files changed, 125 insertions, 16 deletions
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index c8f57b8449dc..d8023bc8d1ad 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -35,6 +35,7 @@ #define ARC_REG_RTT_BCR 0xF2 #define ARC_REG_IRQ_BCR 0xF3 #define ARC_REG_SMART_BCR 0xFF +#define ARC_REG_CLUSTER_BCR 0xcf /* status32 Bits Positions */ #define STATUS_AE_BIT 5 /* Exception active */ diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index d67345d3e2d4..e23ea6e7633a 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -53,6 +53,8 @@ extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); +extern int ioc_exists; + #endif /* !__ASSEMBLY__ */ /* Instruction cache related Auxiliary registers */ @@ -94,4 +96,10 @@ extern void read_decode_cache_bcr(void); #define SLC_CTRL_BUSY 0x100 #define SLC_CTRL_RGN_OP_INV 0x200 +/* IO coherency related Auxiliary registers */ +#define ARC_REG_IO_COH_ENABLE 0x500 +#define ARC_REG_IO_COH_PARTIAL 0x501 +#define ARC_REG_IO_COH_AP0_BASE 0x508 +#define ARC_REG_IO_COH_AP0_SIZE 0x509 + #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 1cd6695b6ab5..25e7077d4c04 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -22,10 +22,15 @@ #include <asm/setup.h> static int l2_line_sz; +int ioc_exists; void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, unsigned long sz, const int cacheop); +void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz); +void (*__dma_cache_inv)(unsigned long start, unsigned long sz); +void (*__dma_cache_wback)(unsigned long start, unsigned long sz); + char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; @@ -50,6 +55,9 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) n += scnprintf(buf + n, len - n, "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len); + if (ioc_exists) + n += scnprintf(buf + n, len - n, "IOC\t\t: exists\n"); + return buf; } @@ -80,6 +88,14 @@ void read_decode_cache_bcr(void) #endif } slc_cfg; + struct bcr_clust_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; +#else + unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; +#endif + } cbcr; + p_ic = &cpuinfo_arc700[cpu].icache; READ_BCR(ARC_REG_IC_BCR, ibcr); @@ -133,6 +149,10 @@ slc_chk: p_slc->sz_k = 128 << slc_cfg.sz; l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; } + + READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); + if (cbcr.c) + ioc_exists = 1; } /* @@ -516,11 +536,6 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) #endif } -static inline int need_slc_flush(void) -{ - return is_isa_arcv2() && l2_line_sz; -} - /*********************************************************** * Exported APIs */ @@ -569,30 +584,74 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -void dma_cache_wback_inv(unsigned long start, unsigned long sz) +/* + * DMA ops for systems with L1 cache only + * Make memory coherent with L1 cache by flushing/invalidating L1 lines + */ +static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_FLUSH_N_INV); +} - if (need_slc_flush()) - slc_op(start, sz, OP_FLUSH_N_INV); +static void __dma_cache_inv_l1(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); } -EXPORT_SYMBOL(dma_cache_wback_inv); -void dma_cache_inv(unsigned long start, unsigned long sz) +static void __dma_cache_wback_l1(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); +} + +/* + * DMA ops for systems with both L1 and L2 caches, but without IOC + * Both L1 and L2 lines need to be explicity flushed/invalidated + */ +static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); + slc_op(start, sz, OP_FLUSH_N_INV); +} + +static void __dma_cache_inv_slc(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_INV); + slc_op(start, sz, OP_INV); +} - if (need_slc_flush()) - slc_op(start, sz, OP_INV); +static void __dma_cache_wback_slc(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); + slc_op(start, sz, OP_FLUSH); +} + +/* + * DMA ops for systems with IOC + * IOC hardware snoops all DMA traffic keeping the caches consistent with + * memory - eliding need for any explicit cache maintenance of DMA buffers + */ +static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {} +static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {} +static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {} + +/* + * Exported DMA API + */ +void dma_cache_wback_inv(unsigned long start, unsigned long sz) +{ + __dma_cache_wback_inv(start, sz); +} +EXPORT_SYMBOL(dma_cache_wback_inv); + +void dma_cache_inv(unsigned long start, unsigned long sz) +{ + __dma_cache_inv(start, sz); } EXPORT_SYMBOL(dma_cache_inv); void dma_cache_wback(unsigned long start, unsigned long sz) { - __dc_line_op_k(start, sz, OP_FLUSH); - - if (need_slc_flush()) - slc_op(start, sz, OP_FLUSH); + __dma_cache_wback(start, sz); } EXPORT_SYMBOL(dma_cache_wback); @@ -848,4 +907,27 @@ void arc_cache_init(void) panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); } } + + if (is_isa_arcv2() && ioc_exists) { + /* IO coherency base - 0x8z */ + write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); + /* IO coherency aperture size - 512Mb: 0x8z-0xAz */ + write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11); + /* Enable partial writes */ + write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); + /* Enable IO coherency */ + write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); + + __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; + __dma_cache_inv = __dma_cache_inv_ioc; + __dma_cache_wback = __dma_cache_wback_ioc; + } else if (is_isa_arcv2() && l2_line_sz) { + __dma_cache_wback_inv = __dma_cache_wback_inv_slc; + __dma_cache_inv = __dma_cache_inv_slc; + __dma_cache_wback = __dma_cache_wback_slc; + } else { + __dma_cache_wback_inv = __dma_cache_wback_inv_l1; + __dma_cache_inv = __dma_cache_inv_l1; + __dma_cache_wback = __dma_cache_wback_l1; + } } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 57706a9c6948..e039fac163f8 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -19,6 +19,7 @@ #include <linux/dma-mapping.h> #include <linux/dma-debug.h> #include <linux/export.h> +#include <asm/cache.h> #include <asm/cacheflush.h> /* @@ -53,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size, { void *paddr, *kvaddr; + /* + * IOC relies on all data (even coherent DMA data) being in cache + * Thus allocate normal cached memory + * + * The gains with IOC are two pronged: + * -For streaming data, elides needs for cache maintenance, saving + * cycles in flush code, and bus bandwidth as all the lines of a + * buffer need to be flushed out to memory + * -For coherent data, Read/Write to buffers terminate early in cache + * (vs. always going to memory - thus are faster) + */ + if (ioc_exists) + return dma_alloc_noncoherent(dev, size, dma_handle, gfp); + /* This is linear addr (0x8000_0000 based) */ paddr = alloc_pages_exact(size, gfp); if (!paddr) @@ -85,6 +100,9 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, dma_addr_t dma_handle) { + if (ioc_exists) + return dma_free_noncoherent(dev, size, kvaddr, dma_handle); + iounmap((void __force __iomem *)kvaddr); free_pages_exact((void *)dma_handle, size); |