From 1269f4d5c9022ddd53d62e9bfa89b2af08c934ea Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 25 Apr 2015 10:40:52 +0530 Subject: ARC: fix warning in sched due to thread_saved_pc() Signed-off-by: Vineet Gupta --- arch/arc/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 52312cb5dbe2..649e4398ed8e 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -77,7 +77,7 @@ struct task_struct; */ #define TSK_K_ESP(tsk) (tsk->thread.ksp) -#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \ +#define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \ sizeof(struct callee_regs) + off))) #define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) -- cgit v1.2.3-58-ga151 From 45309493509b5acd667246c8232dd4911a7a168c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 18 May 2015 12:46:37 +0530 Subject: ARC: fold ___flush_dcache_page into __flush_dcache_page Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cacheflush.h | 4 +--- arch/arc/mm/cache_arc700.c | 12 ++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index 6abc4972bc93..0992d3dbcc65 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -34,9 +34,7 @@ void flush_cache_all(void); void flush_icache_range(unsigned long start, unsigned long end); void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); void __inv_icache_page(unsigned long paddr, unsigned long vaddr); -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr); -#define __flush_dcache_page(p, v) \ - ___flush_dcache_page((unsigned long)p, (unsigned long)v) +void __flush_dcache_page(unsigned long paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 12b2100db073..322e11b3b1e3 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -493,7 +493,7 @@ void flush_dcache_page(struct page *page) } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ - void *paddr = page_address(page); + unsigned long paddr = (unsigned long)page_address(page); unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; if (addr_not_cache_congruent(paddr, vaddr)) @@ -605,7 +605,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) * wrapper to clearout kernel or userspace mappings of a page * For kernel mappings @vaddr == @paddr */ -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) +void __flush_dcache_page(unsigned long paddr, unsigned long vaddr) { __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } @@ -637,7 +637,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, u_vaddr &= PAGE_MASK; - ___flush_dcache_page(paddr, u_vaddr); + __flush_dcache_page(paddr, u_vaddr); if (vma->vm_flags & VM_EXEC) __inv_icache_page(paddr, u_vaddr); @@ -663,8 +663,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { - void *kfrom = page_address(from); - void *kto = page_address(to); + unsigned long kfrom = (unsigned long)page_address(from); + unsigned long kto = (unsigned long)page_address(to); int clean_src_k_mappings = 0; /* @@ -680,7 +680,7 @@ void copy_user_highpage(struct page *to, struct page *from, clean_src_k_mappings = 1; } - copy_page(kto, kfrom); + copy_page((void *)kto, (void *)kfrom); /* * Mark DST page K-mapping as dirty for a later finalization by -- cgit v1.2.3-58-ga151 From 454bfda9aca0f6a0487eef523ac92dfc6646807d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 16 Apr 2015 21:04:49 +0530 Subject: ARC: remove the unused platform helpers from dma mapping API Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ---- arch/arc/include/asm/dma-mapping.h | 31 +++++-------------------------- arch/arc/mm/dma.c | 12 ++++-------- 3 files changed, 9 insertions(+), 38 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index df94ac1f75b6..91cac572ea89 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -117,10 +117,6 @@ config CPU_BIG_ENDIAN help Build kernel for Big Endian Mode of ARC CPU -# If a platform can't work with 0x8000_0000 based dma_addr_t -config ARC_PLAT_NEEDS_CPU_TO_DMA - bool - config SMP bool "Symmetric Multi-Processing (Incomplete)" default n diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h index 45b8e0cea176..fd6cdb56d4fd 100644 --- a/arch/arc/include/asm/dma-mapping.h +++ b/arch/arc/include/asm/dma-mapping.h @@ -14,23 +14,6 @@ #include #include -#ifndef CONFIG_ARC_PLAT_NEEDS_CPU_TO_DMA -/* - * dma_map_* API take cpu addresses, which is kernel logical address in the - * untranslated address space (0x8000_0000) based. The dma address (bus addr) - * ideally needs to be 0x0000_0000 based hence these glue routines. - * However given that intermediate bus bridges can ignore the high bit, we can - * do with these routines being no-ops. - * If a platform/device comes up which sriclty requires 0 based bus addr - * (e.g. AHB-PCI bridge on Angel4 board), then it can provide it's own versions - */ -#define plat_dma_addr_to_kernel(dev, addr) ((unsigned long)(addr)) -#define plat_kernel_addr_to_dma(dev, ptr) ((dma_addr_t)(ptr)) - -#else -#include -#endif - void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); @@ -94,7 +77,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction dir) { _dma_cache_sync((unsigned long)cpu_addr, size, dir); - return plat_kernel_addr_to_dma(dev, cpu_addr); + return (dma_addr_t)cpu_addr; } static inline void @@ -147,16 +130,14 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size, - DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size, - DMA_TO_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE); } static inline void @@ -164,8 +145,7 @@ dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset, - size, DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle + offset, size, DMA_FROM_DEVICE); } static inline void @@ -173,8 +153,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset, - size, DMA_TO_DEVICE); + _dma_cache_sync(dma_handle + offset, size, DMA_TO_DEVICE); } static inline void diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 12cc6485b218..2cfe81dca92a 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -14,8 +14,6 @@ * Cache bit off in the TLB entry. * * The default DMA address == Phy address which is 0x8000_0000 based. - * A platform/device can make it zero based, by over-riding - * plat_{dma,kernel}_addr_to_{kernel,dma} */ #include @@ -37,7 +35,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size, return NULL; /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + *dma_handle = (dma_addr_t)paddr; return paddr; } @@ -46,8 +44,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent); void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + free_pages_exact((void *)dma_handle, size); } EXPORT_SYMBOL(dma_free_noncoherent); @@ -67,7 +64,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, memset(kvaddr, 0, size); /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + *dma_handle = (dma_addr_t)paddr; return kvaddr; } @@ -78,8 +75,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, { iounmap((void __force __iomem *)kvaddr); - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + free_pages_exact((void *)dma_handle, size); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3-58-ga151 From 40b552d95a545e828fb4ebbf68a385cb9eaebf64 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 13 Feb 2015 18:33:47 +0530 Subject: ARC: compress cpuinfo_arc_mmu (mainly save page size in KB) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 3 ++- arch/arc/mm/tlb.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index e2b1b1211b0d..56e2f1f2a3c5 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -299,7 +299,8 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb; + unsigned int ver:4, pg_sz_k:8, pad:8, u_dtlb:6, u_itlb:6; + unsigned int num_tlb:16, sets:12, ways:4; }; struct cpuinfo_arc_cache { diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 7f47d2a56f44..914d8e0c0318 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -587,14 +587,14 @@ void read_decode_mmu_bcr(void) if (mmu->ver <= 2) { mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz = PAGE_SIZE; + mmu->pg_sz_k = TO_KB(PAGE_SIZE); mmu->sets = 1 << mmu2->sets; mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; mmu->u_itlb = mmu2->u_itlb; } else { mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz = 512 << mmu3->pg_sz; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); mmu->sets = 1 << mmu3->sets; mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; @@ -611,7 +611,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", - p_mmu->ver, TO_KB(p_mmu->pg_sz), + p_mmu->ver, p_mmu->pg_sz_k, p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); @@ -639,7 +639,7 @@ void arc_mmu_init(void) mmu->ver, CONFIG_ARC_MMU_VER); } - if (mmu->pg_sz != PAGE_SIZE) + if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); /* Enable the MMU */ -- cgit v1.2.3-58-ga151 From 4db27dca607aed14a852b21db02ddb530551c5eb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Mar 2015 14:46:20 +0530 Subject: ARC: mm: document system mem map clearly Signed-off-by: Vineet Gupta --- arch/arc/include/asm/processor.h | 35 ++++++++++++++++------------------- arch/arc/include/uapi/asm/page.h | 2 +- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 649e4398ed8e..ee682d8e0213 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -100,29 +100,26 @@ extern unsigned int get_wchan(struct task_struct *p); #endif /* !__ASSEMBLY__ */ -/* Kernels Virtual memory area. - * Unlike other architectures(MIPS, sh, cris ) ARC 700 does not have a - * "kernel translated" region (like KSEG2 in MIPS). So we use a upper part - * of the translated bottom 2GB for kernel virtual memory and protect - * these pages from user accesses by disabling Ru, Eu and Wu. +/* + * System Memory Map on ARC + * + * ---------------------------- (lower 2G, Translated) ------------------------- + * 0x0000_0000 0x5FFF_FFFF (user vaddr: TASK_SIZE) + * 0x6000_0000 0x6FFF_FFFF (reserved gutter between U/K) + * 0x7000_0000 0x7FFF_FFFF (kvaddr: vmalloc/modules/pkmap..) + * + * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) ----------------------- + * 0x8000_0000 0xBFFF_FFFF (kernel direct mapped) + * 0xC000_0000 0xFFFF_FFFF (peripheral uncached space) + * ----------------------------------------------------------------------------- */ -#define VMALLOC_SIZE (0x10000000) /* 256M */ -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) -#define VMALLOC_END (PAGE_OFFSET) +#define VMALLOC_START 0x70000000 +#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START) +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) -/* Most of the architectures seem to be keeping some kind of padding between - * userspace TASK_SIZE and PAGE_OFFSET. i.e TASK_SIZE != PAGE_OFFSET. - */ #define USER_KERNEL_GUTTER 0x10000000 -/* User address space: - * On ARC700, CPU allows the entire lower half of 32 bit address space to be - * translated. Thus potentially 2G (0:0x7FFF_FFFF) could be User vaddr space. - * However we steal 256M for kernel addr (0x7000_0000:0x7FFF_FFFF) and another - * 256M (0x6000_0000:0x6FFF_FFFF) is gutter between user/kernel spaces - * Thus total User vaddr space is (0:0x5FFF_FFFF) - */ -#define TASK_SIZE (PAGE_OFFSET - VMALLOC_SIZE - USER_KERNEL_GUTTER) +#define TASK_SIZE (VMALLOC_START - USER_KERNEL_GUTTER) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index e5d41e08240c..9d129a2a1351 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -30,7 +30,7 @@ #define PAGE_OFFSET (0x80000000) #else #define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */ -#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ +#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ #endif #define PAGE_MASK (~(PAGE_SIZE-1)) -- cgit v1.2.3-58-ga151 From fbfa26ae3b2001d0885938175f9af5a4c50a528c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Oct 2014 15:12:25 +0530 Subject: ARC: entry.S: common'ize scrtach reg freeup in intr + exceptions Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry.h | 35 ++++++++++++++++------------------- arch/arc/kernel/entry.S | 10 ++-------- 2 files changed, 18 insertions(+), 27 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 884081099f80..9e844ab5eede 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -360,26 +360,26 @@ .endm /*-------------------------------------------------------------- - * For early Exception Prologue, a core reg is temporarily needed to + * For early Exception/ISR Prologue, a core reg is temporarily needed to * code the rest of prolog (stack switching). This is done by stashing * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). * * Before saving the full regfile - this reg is restored back, only * to be saved again on kernel mode stack, as part of pt_regs. *-------------------------------------------------------------*/ -.macro EXCPN_PROLOG_FREEUP_REG reg +.macro PROLOG_FREEUP_REG reg, mem #ifdef CONFIG_SMP sr \reg, [ARC_REG_SCRATCH_DATA0] #else - st \reg, [@ex_saved_reg1] + st \reg, [\mem] #endif .endm -.macro EXCPN_PROLOG_RESTORE_REG reg +.macro PROLOG_RESTORE_REG reg, mem #ifdef CONFIG_SMP lr \reg, [ARC_REG_SCRATCH_DATA0] #else - ld \reg, [@ex_saved_reg1] + ld \reg, [\mem] #endif .endm @@ -393,7 +393,7 @@ .macro EXCEPTION_PROLOGUE /* Need at least 1 reg to code the early exception prologue */ - EXCPN_PROLOG_FREEUP_REG r9 + PROLOG_FREEUP_REG r9, @ex_saved_reg1 /* U/K mode at time of exception (stack not switched if already K) */ lr r9, [erstatus] @@ -421,7 +421,7 @@ st r0, [sp, 4] /* orig_r0, needed only for sys calls */ /* Restore r9 used to code the early prologue */ - EXCPN_PROLOG_RESTORE_REG r9 + PROLOG_RESTORE_REG r9, @ex_saved_reg1 SAVE_R0_TO_R12 PUSH gp @@ -471,12 +471,8 @@ *-------------------------------------------------------------*/ .macro SAVE_ALL_INT1 - /* restore original r9 to be saved as part of reg-file */ -#ifdef CONFIG_SMP - lr r9, [ARC_REG_SCRATCH_DATA0] -#else - ld r9, [@int1_saved_reg] -#endif + /* restore original r9 */ + PROLOG_RESTORE_REG r9, @int1_saved_reg /* now we are ready to save the remaining context :) */ st event_IRQ1, [sp, 8] /* Dummy ECR */ @@ -496,12 +492,13 @@ .macro SAVE_ALL_INT2 - /* TODO-vineetg: SMP we can't use global nor can we use - * SCRATCH0 as we do for int1 because while int1 is using - * it, int2 can come - */ - /* retsore original r9 , saved in sys_saved_r9 */ - ld r9, [@int2_saved_reg] + /* + * In SMP we can't use mem nor can we use SCRARCH_DATA0 + * as we do for int1 because int2 can clobber it + * Hence 2 levels of intr are NOT allowed in SMP (by Kconfig) + */ + /* restore original r9 */ + PROLOG_RESTORE_REG r9, @int2_saved_reg /* now we are ready to save the remaining context :) */ st event_IRQ2, [sp, 8] /* Dummy ECR */ diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index d868289c5a26..13b14b8dcd8d 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -186,9 +186,8 @@ reserved: ; processor restart ; --------------------------------------------- ENTRY(handle_interrupt_level2) - ; TODO-vineetg for SMP this wont work ; free up r9 as scratchpad - st r9, [@int2_saved_reg] + PROLOG_FREEUP_REG r9, @int2_saved_reg ;Which mode (user/kernel) was the system in when intr occured lr r9, [status32_l2] @@ -234,12 +233,7 @@ END(handle_interrupt_level2) ; --------------------------------------------- ENTRY(handle_interrupt_level1) - /* free up r9 as scratchpad */ -#ifdef CONFIG_SMP - sr r9, [ARC_REG_SCRATCH_DATA0] -#else - st r9, [@int1_saved_reg] -#endif + PROLOG_FREEUP_REG r9, @int1_saved_reg ;Which mode (user/kernel) was the system in when intr occured lr r9, [status32_l1] -- cgit v1.2.3-58-ga151 From 09f3b37e4e3bbe22444617c273a3c046aade5db2 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Oct 2014 18:13:35 +0530 Subject: ARC: entry.S: Introduce INTERRUPT_{PROLOGUE,EPILOGUE} -common'ize macros for level 1 and level 2 interrupts Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 3 -- arch/arc/include/asm/entry.h | 77 +++++++++++------------------------------- arch/arc/kernel/entry.S | 22 +++--------- 3 files changed, 23 insertions(+), 79 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 56e2f1f2a3c5..3ab66fcd9df1 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -76,9 +76,6 @@ #define ECR_C_BIT_DTLB_LD_MISS 8 #define ECR_C_BIT_DTLB_ST_MISS 9 -/* Dummy ECR values for Interrupts */ -#define event_IRQ1 0x0031abcd -#define event_IRQ2 0x0032abcd /* Auxiliary registers */ #define AUX_IDENTITY 4 diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 9e844ab5eede..934bb835106f 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -465,55 +465,37 @@ /* orig_r0, ECR, user_r25 skipped automatically */ .endm +/* Dummy ECR values for Interrupts */ +#define event_IRQ1 0x0031abcd +#define event_IRQ2 0x0032abcd -/*-------------------------------------------------------------- - * Save all registers used by interrupt handlers. - *-------------------------------------------------------------*/ -.macro SAVE_ALL_INT1 +.macro INTERRUPT_PROLOGUE LVL - /* restore original r9 */ - PROLOG_RESTORE_REG r9, @int1_saved_reg + /* free up r9 as scratchpad */ + PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg - /* now we are ready to save the remaining context :) */ - st event_IRQ1, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ + /* Which mode (user/kernel) was the system in when intr occurred */ + lr r9, [status32_l\LVL\()] - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSH ilink1 - PUSHAX status32_l1 - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX bta_l1 -.endm - -.macro SAVE_ALL_INT2 + SWITCH_TO_KERNEL_STK - /* - * In SMP we can't use mem nor can we use SCRARCH_DATA0 - * as we do for int1 because int2 can clobber it - * Hence 2 levels of intr are NOT allowed in SMP (by Kconfig) - */ /* restore original r9 */ - PROLOG_RESTORE_REG r9, @int2_saved_reg + PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg - /* now we are ready to save the remaining context :) */ - st event_IRQ2, [sp, 8] /* Dummy ECR */ + /* now we are ready to save the remaining context */ + st 0x003\LVL\()abcd, [sp, 8] /* Dummy ECR */ st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ SAVE_R0_TO_R12 PUSH gp PUSH fp PUSH blink - PUSH ilink2 - PUSHAX status32_l2 + PUSH ilink\LVL\() + PUSHAX status32_l\LVL\() PUSH lp_count PUSHAX lp_end PUSHAX lp_start - PUSHAX bta_l2 + PUSHAX bta_l\LVL\() .endm /*-------------------------------------------------------------- @@ -525,17 +507,16 @@ * for memory load operations. If used in that way interrupts are deffered * by hardware and that is not good. *-------------------------------------------------------------*/ - -.macro RESTORE_ALL_INT1 - POPAX bta_l1 +.macro INTERRUPT_EPILOGUE LVL + POPAX bta_l\LVL\() POPAX lp_start POPAX lp_end POP r9 mov lp_count, r9 ;LD to lp_count is not allowed - POPAX status32_l1 - POP ilink1 + POPAX status32_l\LVL\() + POP ilink\LVL\() POP blink POP fp POP gp @@ -545,26 +526,6 @@ /* orig_r0, ECR, user_r25 skipped automatically */ .endm -.macro RESTORE_ALL_INT2 - POPAX bta_l2 - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX status32_l2 - POP ilink2 - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - - /* Get CPU-ID of this core */ .macro GET_CPU_ID reg lr \reg, [identity] diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 13b14b8dcd8d..03a349520b55 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -186,14 +186,7 @@ reserved: ; processor restart ; --------------------------------------------- ENTRY(handle_interrupt_level2) - ; free up r9 as scratchpad - PROLOG_FREEUP_REG r9, @int2_saved_reg - - ;Which mode (user/kernel) was the system in when intr occured - lr r9, [status32_l2] - - SWITCH_TO_KERNEL_STK - SAVE_ALL_INT2 + INTERRUPT_PROLOGUE 2 ;------------------------------------------------------ ; if L2 IRQ interrupted a L1 ISR, disable preemption @@ -233,13 +226,7 @@ END(handle_interrupt_level2) ; --------------------------------------------- ENTRY(handle_interrupt_level1) - PROLOG_FREEUP_REG r9, @int1_saved_reg - - ;Which mode (user/kernel) was the system in when intr occured - lr r9, [status32_l1] - - SWITCH_TO_KERNEL_STK - SAVE_ALL_INT1 + INTERRUPT_PROLOGUE 1 lr r0, [icause1] and r0, r0, 0x1f @@ -698,7 +685,7 @@ not_exception: 149: ;return from level 2 - RESTORE_ALL_INT2 + INTERRUPT_EPILOGUE 2 debug_marker_l2: rtie @@ -709,8 +696,7 @@ not_level2_interrupt: bbit0 r10, STATUS_A1_BIT, not_level1_interrupt ;return from level 1 - - RESTORE_ALL_INT1 + INTERRUPT_EPILOGUE 1 debug_marker_l1: rtie -- cgit v1.2.3-58-ga151 From f033737e77d98893f3d03586215aeec026dd7e2f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 14 Oct 2014 12:23:50 +0530 Subject: ARC: entry.S: canonical'ize EXCEPTION_{PROLOGUE,EPILOGUE} -EXCEPTION_EPILOGUE introduced -EXCEPTION_PROLOGUE now also includes reg file saving Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry.h | 17 +---------------- arch/arc/kernel/entry.S | 4 ++-- 2 files changed, 3 insertions(+), 18 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 934bb835106f..f3927a156efb 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -401,21 +401,6 @@ /* ARC700 doesn't provide auto-stack switching */ SWITCH_TO_KERNEL_STK - /* save the regfile */ - SAVE_ALL_SYS -.endm - -/*-------------------------------------------------------------- - * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc) - * Requires SP to be already switched to kernel mode Stack - * sp points to the next free element on the stack at exit of this macro. - * Registers are pushed / popped in the order defined in struct ptregs - * in asm/ptrace.h - * Note that syscalls are implemented via TRAP which is also a exception - * from CPU's point of view - *-------------------------------------------------------------*/ -.macro SAVE_ALL_SYS - lr r9, [ecr] st r9, [sp, 8] /* ECR */ st r0, [sp, 4] /* orig_r0, needed only for sys calls */ @@ -446,7 +431,7 @@ * for memory load operations. If used in that way interrupts are deffered * by hardware and that is not good. *-------------------------------------------------------------*/ -.macro RESTORE_ALL_SYS +.macro EXCEPTION_EPILOGUE POPAX erbta POPAX lp_start POPAX lp_end diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 03a349520b55..d8ec722a936b 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -638,7 +638,7 @@ restore_regs : ; if Returning from Exception bbit0 r10, STATUS_AE_BIT, not_exception - RESTORE_ALL_SYS + EXCEPTION_EPILOGUE rtie ; Not Exception so maybe Interrupts (Level 1 or 2) @@ -704,7 +704,7 @@ not_level1_interrupt: ;this case is for syscalls or Exceptions (with fake rtie) - RESTORE_ALL_SYS + EXCEPTION_EPILOGUE debug_marker_syscall: rtie -- cgit v1.2.3-58-ga151 From 9b8c7d1e7107c30a50aae27c1f33fe706c8c6c67 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 27 Feb 2015 16:43:08 +0530 Subject: ARC: entry.S: FAKE_RET_FROM_EXCPN can always use r9 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry.h | 16 ++++++++-------- arch/arc/kernel/entry.S | 24 +++++++++++------------- 2 files changed, 19 insertions(+), 21 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index f3927a156efb..408a71378b5e 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -331,14 +331,14 @@ * Look at EV_ProtV to see how this is actually used *-------------------------------------------------------------*/ -.macro FAKE_RET_FROM_EXCPN reg - - ld \reg, [sp, PT_status32] - bic \reg, \reg, (STATUS_U_MASK|STATUS_DE_MASK) - bset \reg, \reg, STATUS_L_BIT - sr \reg, [erstatus] - mov \reg, 55f - sr \reg, [eret] +.macro FAKE_RET_FROM_EXCPN + + ld r9, [sp, PT_status32] + bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) + bset r9, r9, STATUS_L_BIT + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] rtie 55: diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 6cced37e7a76..0a75f81e2853 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -261,7 +261,7 @@ ENTRY(instr_service) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_insterror_or_kprobe b ret_from_exception @@ -278,7 +278,7 @@ ENTRY(mem_service) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_memory_error b ret_from_exception @@ -330,12 +330,11 @@ ENTRY(EV_TLBProtV) lr r2, [ecr] lr r0, [efa] ; Faulting Data address - ; --------(4) Return from CPU Exception Mode --------- - ; Fake a rtie, but rtie to next label - ; That way, subsequently, do_page_fault ( ) executes in pure kernel - ; mode with further Exceptions enabled + ; Exception auto-disables further Intr/exceptions. + ; Re-enable them by pretending to return from exception + ; (so rest of handler executes in pure K mode) - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN mov r1, sp @@ -377,7 +376,7 @@ ENTRY(call_do_page_fault) EXCEPTION_PROLOGUE lr r0, [efa] ; Faulting Data address mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN mov blink, ret_from_exception b do_page_fault @@ -394,7 +393,7 @@ ENTRY(EV_PrivilegeV) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_privilege_fault b ret_from_exception @@ -410,7 +409,7 @@ ENTRY(EV_Extension) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_extension_fault b ret_from_exception @@ -472,7 +471,7 @@ trap_with_param: ; Now that we have read EFA, it is safe to do "fake" rtie ; and get out of CPU exception mode - FAKE_RET_FROM_EXCPN r11 + FAKE_RET_FROM_EXCPN ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File @@ -512,8 +511,7 @@ ENTRY(EV_Trap) ; ======= (5a) Trap is due to System Call ======== - ; Before doing anything, return from CPU Exception Mode - FAKE_RET_FROM_EXCPN r11 + FAKE_RET_FROM_EXCPN ; If syscall tracing ongoing, invoke pre-pos-hooks GET_CURR_THR_INFO_FLAGS r10 -- cgit v1.2.3-58-ga151 From 6d1a20b1d237db29878ae54142e39c87a36d0e95 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 21 Feb 2015 15:09:32 +0530 Subject: ARC: entry.S: split into ARCompact ISA specific, common bits Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 306 +++++++++++++++++++++++++++ arch/arc/include/asm/entry.h | 300 +------------------------- arch/arc/kernel/Makefile | 4 +- arch/arc/kernel/entry-compact.S | 393 +++++++++++++++++++++++++++++++++++ arch/arc/kernel/entry.S | 389 +--------------------------------- 5 files changed, 711 insertions(+), 681 deletions(-) create mode 100644 arch/arc/include/asm/entry-compact.h create mode 100644 arch/arc/kernel/entry-compact.S (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h new file mode 100644 index 000000000000..6c0a81b598d2 --- /dev/null +++ b/arch/arc/include/asm/entry-compact.h @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineetg: March 2009 (Supporting 2 levels of Interrupts) + * Stack switching code can no longer reliably rely on the fact that + * if we are NOT in user mode, stack is switched to kernel mode. + * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed + * it's prologue including stack switching from user mode + * + * Vineetg: Aug 28th 2008: Bug #94984 + * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap + * Normally CPU does this automatically, however when doing FAKE rtie, + * we also need to explicitly do this. The problem in macros + * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit + * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context + * + * Vineetg: May 5th 2008 + * -Modified CALLEE_REG save/restore macros to handle the fact that + * r25 contains the kernel current task ptr + * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs + * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the + * address Write back load ld.ab instead of seperate ld/add instn + * + * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 + */ + +#ifndef __ASM_ARC_ENTRY_COMPACT_H +#define __ASM_ARC_ENTRY_COMPACT_H + +#include +#include /* For THREAD_SIZE */ + +/*-------------------------------------------------------------- + * Switch to Kernel Mode stack if SP points to User Mode stack + * + * Entry : r9 contains pre-IRQ/exception/trap status32 + * Exit : SP is set to kernel mode stack pointer + * If CURR_IN_REG, r25 set to "current" task pointer + * Clobbers: r9 + *-------------------------------------------------------------*/ + +.macro SWITCH_TO_KERNEL_STK + + /* User Mode when this happened ? Yes: Proceed to switch stack */ + bbit1 r9, STATUS_U_BIT, 88f + + /* OK we were already in kernel mode when this event happened, thus can + * assume SP is kernel mode SP. _NO_ need to do any stack switching + */ + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + /* However.... + * If Level 2 Interrupts enabled, we may end up with a corner case: + * 1. User Task executing + * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) + * 3. But before it could switch SP from USER to KERNEL stack + * a L2 IRQ "Interrupts" L1 + * Thay way although L2 IRQ happened in Kernel mode, stack is still + * not switched. + * To handle this, we may need to switch stack even if in kernel mode + * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) + */ + brlo sp, VMALLOC_START, 88f + + /* TODO: vineetg: + * We need to be a bit more cautious here. What if a kernel bug in + * L1 ISR, caused SP to go whaco (some small value which looks like + * USER stk) and then we take L2 ISR. + * Above brlo alone would treat it as a valid L1-L2 sceanrio + * instead of shouting alound + * The only feasible way is to make sure this L2 happened in + * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in + * L1 ISR before it switches stack + */ + +#endif + + /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack + * safe-keeping not really needed, but it keeps the epilogue code + * (SP restore) simpler/uniform. + */ + b.d 66f + mov r9, sp + +88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ + + GET_CURR_TASK_ON_CPU r9 + + /* With current tsk in r9, get it's kernel mode stack base */ + GET_TSK_STACK_BASE r9, r9 + +66: +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * Treat r25 as scratch reg, save it on stack first + * Load it with current task pointer + */ + st r25, [r9, -4] + GET_CURR_TASK_ON_CPU r25 +#endif + + /* Save Pre Intr/Exception User SP on kernel stack */ + st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25 + + /* CAUTION: + * SP should be set at the very end when we are done with everything + * In case of 2 levels of interrupt we depend on value of SP to assume + * that everything else is done (loading r25 etc) + */ + + /* set SP to point to kernel mode stack */ + mov sp, r9 + + /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ + +.endm + +/*------------------------------------------------------------ + * "FAKE" a rtie to return from CPU Exception context + * This is to re-enable Exceptions within exception + * Look at EV_ProtV to see how this is actually used + *-------------------------------------------------------------*/ + +.macro FAKE_RET_FROM_EXCPN + + ld r9, [sp, PT_status32] + bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) + bset r9, r9, STATUS_L_BIT + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] + + rtie +55: +.endm + +/*-------------------------------------------------------------- + * For early Exception/ISR Prologue, a core reg is temporarily needed to + * code the rest of prolog (stack switching). This is done by stashing + * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). + * + * Before saving the full regfile - this reg is restored back, only + * to be saved again on kernel mode stack, as part of pt_regs. + *-------------------------------------------------------------*/ +.macro PROLOG_FREEUP_REG reg, mem +#ifdef CONFIG_SMP + sr \reg, [ARC_REG_SCRATCH_DATA0] +#else + st \reg, [\mem] +#endif +.endm + +.macro PROLOG_RESTORE_REG reg, mem +#ifdef CONFIG_SMP + lr \reg, [ARC_REG_SCRATCH_DATA0] +#else + ld \reg, [\mem] +#endif +.endm + +/*-------------------------------------------------------------- + * Exception Entry prologue + * -Switches stack to K mode (if not already) + * -Saves the register file + * + * After this it is safe to call the "C" handlers + *-------------------------------------------------------------*/ +.macro EXCEPTION_PROLOGUE + + /* Need at least 1 reg to code the early exception prologue */ + PROLOG_FREEUP_REG r9, @ex_saved_reg1 + + /* U/K mode at time of exception (stack not switched if already K) */ + lr r9, [erstatus] + + /* ARC700 doesn't provide auto-stack switching */ + SWITCH_TO_KERNEL_STK + + lr r9, [ecr] + st r9, [sp, 8] /* ECR */ + st r0, [sp, 4] /* orig_r0, needed only for sys calls */ + + /* Restore r9 used to code the early prologue */ + PROLOG_RESTORE_REG r9, @ex_saved_reg1 + + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSHAX eret + PUSHAX erstatus + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX erbta +.endm + +/*-------------------------------------------------------------- + * Restore all registers used by system call or Exceptions + * SP should always be pointing to the next free stack element + * when entering this macro. + * + * NOTE: + * + * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg + * for memory load operations. If used in that way interrupts are deffered + * by hardware and that is not good. + *-------------------------------------------------------------*/ +.macro EXCEPTION_EPILOGUE + POPAX erbta + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX erstatus + POPAX eret + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ +.endm + +/* Dummy ECR values for Interrupts */ +#define event_IRQ1 0x0031abcd +#define event_IRQ2 0x0032abcd + +.macro INTERRUPT_PROLOGUE LVL + + /* free up r9 as scratchpad */ + PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg + + /* Which mode (user/kernel) was the system in when intr occured */ + lr r9, [status32_l\LVL\()] + + SWITCH_TO_KERNEL_STK + + /* restore original r9 */ + PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg + + /* now we are ready to save the remaining context */ + st 0x003\LVL\()abcd, [sp, 8] /* Dummy ECR */ + st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ + + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSH ilink\LVL\() + PUSHAX status32_l\LVL\() + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX bta_l\LVL\() +.endm + +/*-------------------------------------------------------------- + * Restore all registers used by interrupt handlers. + * + * NOTE: + * + * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg + * for memory load operations. If used in that way interrupts are deffered + * by hardware and that is not good. + *-------------------------------------------------------------*/ +.macro INTERRUPT_EPILOGUE LVL + POPAX bta_l\LVL\() + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX status32_l\LVL\() + POP ilink\LVL\() + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ +.endm + +/* Get thread_info of "current" tsk */ +.macro GET_CURR_THR_INFO_FROM_SP reg + bic \reg, sp, (THREAD_SIZE - 1) +.endm + +/* Get CPU-ID of this core */ +.macro GET_CPU_ID reg + lr \reg, [identity] + lsr \reg, \reg, 8 + bmsk \reg, \reg, 7 +.endm + +#endif /* __ASM_ARC_ENTRY_COMPACT_H */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 408a71378b5e..f61032c53d51 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -1,45 +1,23 @@ /* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Vineetg: March 2009 (Supporting 2 levels of Interrupts) - * Stack switching code can no longer reliably rely on the fact that - * if we are NOT in user mode, stack is switched to kernel mode. - * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed - * it's prologue including stack switching from user mode - * - * Vineetg: Aug 28th 2008: Bug #94984 - * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap - * Normally CPU does this automatically, however when doing FAKE rtie, - * we also need to explicitly do this. The problem in macros - * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit - * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context - * - * Vineetg: May 5th 2008 - * -Modified CALLEE_REG save/restore macros to handle the fact that - * r25 contains the kernel current task ptr - * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs - * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the - * address Write back load ld.ab instead of seperate ld/add instn - * - * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 */ #ifndef __ASM_ARC_ENTRY_H #define __ASM_ARC_ENTRY_H -#ifdef __ASSEMBLY__ #include /* For NR_syscalls defination */ -#include #include #include #include /* For VMALLOC_START */ -#include /* For THREAD_SIZE */ #include +#include /* ISA specific bits */ + /* Note on the LD/ST addr modes with addr reg wback * * LD.a same as LD.aw @@ -240,117 +218,6 @@ .endm -/*-------------------------------------------------------------- - * Switch to Kernel Mode stack if SP points to User Mode stack - * - * Entry : r9 contains pre-IRQ/exception/trap status32 - * Exit : SP is set to kernel mode stack pointer - * If CURR_IN_REG, r25 set to "current" task pointer - * Clobbers: r9 - *-------------------------------------------------------------*/ - -.macro SWITCH_TO_KERNEL_STK - - /* User Mode when this happened ? Yes: Proceed to switch stack */ - bbit1 r9, STATUS_U_BIT, 88f - - /* OK we were already in kernel mode when this event happened, thus can - * assume SP is kernel mode SP. _NO_ need to do any stack switching - */ - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - /* However.... - * If Level 2 Interrupts enabled, we may end up with a corner case: - * 1. User Task executing - * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) - * 3. But before it could switch SP from USER to KERNEL stack - * a L2 IRQ "Interrupts" L1 - * Thay way although L2 IRQ happened in Kernel mode, stack is still - * not switched. - * To handle this, we may need to switch stack even if in kernel mode - * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) - */ - brlo sp, VMALLOC_START, 88f - - /* TODO: vineetg: - * We need to be a bit more cautious here. What if a kernel bug in - * L1 ISR, caused SP to go whaco (some small value which looks like - * USER stk) and then we take L2 ISR. - * Above brlo alone would treat it as a valid L1-L2 sceanrio - * instead of shouting alound - * The only feasible way is to make sure this L2 happened in - * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in - * L1 ISR before it switches stack - */ - -#endif - - /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack - * safe-keeping not really needed, but it keeps the epilogue code - * (SP restore) simpler/uniform. - */ - b.d 66f - mov r9, sp - -88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ - - GET_CURR_TASK_ON_CPU r9 - - /* With current tsk in r9, get it's kernel mode stack base */ - GET_TSK_STACK_BASE r9, r9 - -66: -#ifdef CONFIG_ARC_CURR_IN_REG - /* - * Treat r25 as scratch reg, save it on stack first - * Load it with current task pointer - */ - st r25, [r9, -4] - GET_CURR_TASK_ON_CPU r25 -#endif - - /* Save Pre Intr/Exception User SP on kernel stack */ - st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25 - - /* CAUTION: - * SP should be set at the very end when we are done with everything - * In case of 2 levels of interrupt we depend on value of SP to assume - * that everything else is done (loading r25 etc) - */ - - /* set SP to point to kernel mode stack */ - mov sp, r9 - - /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ - -.endm - -/*------------------------------------------------------------ - * "FAKE" a rtie to return from CPU Exception context - * This is to re-enable Exceptions within exception - * Look at EV_ProtV to see how this is actually used - *-------------------------------------------------------------*/ - -.macro FAKE_RET_FROM_EXCPN - - ld r9, [sp, PT_status32] - bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) - bset r9, r9, STATUS_L_BIT - sr r9, [erstatus] - mov r9, 55f - sr r9, [eret] - - rtie -55: -.endm - -/* - * @reg [OUT] &thread_info of "current" - */ -.macro GET_CURR_THR_INFO_FROM_SP reg - bic \reg, sp, (THREAD_SIZE - 1) -.endm - /* * @reg [OUT] thread_info->flags of "current" */ @@ -359,165 +226,6 @@ ld \reg, [\reg, THREAD_INFO_FLAGS] .endm -/*-------------------------------------------------------------- - * For early Exception/ISR Prologue, a core reg is temporarily needed to - * code the rest of prolog (stack switching). This is done by stashing - * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). - * - * Before saving the full regfile - this reg is restored back, only - * to be saved again on kernel mode stack, as part of pt_regs. - *-------------------------------------------------------------*/ -.macro PROLOG_FREEUP_REG reg, mem -#ifdef CONFIG_SMP - sr \reg, [ARC_REG_SCRATCH_DATA0] -#else - st \reg, [\mem] -#endif -.endm - -.macro PROLOG_RESTORE_REG reg, mem -#ifdef CONFIG_SMP - lr \reg, [ARC_REG_SCRATCH_DATA0] -#else - ld \reg, [\mem] -#endif -.endm - -/*-------------------------------------------------------------- - * Exception Entry prologue - * -Switches stack to K mode (if not already) - * -Saves the register file - * - * After this it is safe to call the "C" handlers - *-------------------------------------------------------------*/ -.macro EXCEPTION_PROLOGUE - - /* Need at least 1 reg to code the early exception prologue */ - PROLOG_FREEUP_REG r9, @ex_saved_reg1 - - /* U/K mode at time of exception (stack not switched if already K) */ - lr r9, [erstatus] - - /* ARC700 doesn't provide auto-stack switching */ - SWITCH_TO_KERNEL_STK - - lr r9, [ecr] - st r9, [sp, 8] /* ECR */ - st r0, [sp, 4] /* orig_r0, needed only for sys calls */ - - /* Restore r9 used to code the early prologue */ - PROLOG_RESTORE_REG r9, @ex_saved_reg1 - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSHAX eret - PUSHAX erstatus - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX erbta -.endm - -/*-------------------------------------------------------------- - * Restore all registers used by system call or Exceptions - * SP should always be pointing to the next free stack element - * when entering this macro. - * - * NOTE: - * - * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered - * by hardware and that is not good. - *-------------------------------------------------------------*/ -.macro EXCEPTION_EPILOGUE - POPAX erbta - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX erstatus - POPAX eret - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - -/* Dummy ECR values for Interrupts */ -#define event_IRQ1 0x0031abcd -#define event_IRQ2 0x0032abcd - -.macro INTERRUPT_PROLOGUE LVL - - /* free up r9 as scratchpad */ - PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg - - /* Which mode (user/kernel) was the system in when intr occurred */ - lr r9, [status32_l\LVL\()] - - SWITCH_TO_KERNEL_STK - - /* restore original r9 */ - PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg - - /* now we are ready to save the remaining context */ - st 0x003\LVL\()abcd, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSH ilink\LVL\() - PUSHAX status32_l\LVL\() - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX bta_l\LVL\() -.endm - -/*-------------------------------------------------------------- - * Restore all registers used by interrupt handlers. - * - * NOTE: - * - * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered - * by hardware and that is not good. - *-------------------------------------------------------------*/ -.macro INTERRUPT_EPILOGUE LVL - POPAX bta_l\LVL\() - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX status32_l\LVL\() - POP ilink\LVL\() - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - -/* Get CPU-ID of this core */ -.macro GET_CPU_ID reg - lr \reg, [identity] - lsr \reg, \reg, 8 - bmsk \reg, \reg, 7 -.endm - #ifdef CONFIG_SMP /*------------------------------------------------- @@ -586,6 +294,4 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#endif /* __ASSEMBLY__ */ - #endif /* __ASM_ARC_ENTRY_H */ diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 113f2033da9f..024a63e90b72 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -8,9 +8,9 @@ # Pass UTS_MACHINE for user_regset definition CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o entry.o process.o +obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o -obj-y += devtree.o +obj-y += entry-compact.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S new file mode 100644 index 000000000000..bf611ec9a017 --- /dev/null +++ b/arch/arc/kernel/entry-compact.S @@ -0,0 +1,393 @@ +/* + * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: May 2011 + * -Userspace unaligned access emulation + * + * vineetg: Feb 2011 (ptrace low level code fixes) + * -traced syscall return code (r0) was not saved into pt_regs for restoring + * into user reg-file when traded task rets to user space. + * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs) + * were not invoking post-syscall trace hook (jumping directly into + * ret_from_system_call) + * + * vineetg: Nov 2010: + * -Vector table jumps (@8 bytes) converted into branches (@4 bytes) + * -To maintain the slot size of 8 bytes/vector, added nop, which is + * not executed at runtime. + * + * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) + * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well + * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't + * need ptregs anymore + * + * Vineetg: Oct 2009 + * -In a rare scenario, Process gets a Priv-V exception and gets scheduled + * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains + * active (AE bit enabled). This causes a double fault for a subseq valid + * exception. Thus FAKE RTIE needed in low level Priv-Violation handler. + * Instr Error could also cause similar scenario, so same there as well. + * + * Vineetg: March 2009 (Supporting 2 levels of Interrupts) + * + * Vineetg: Aug 28th 2008: Bug #94984 + * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap + * Normally CPU does this automatically, however when doing FAKE rtie, + * we need to explicitly do this. The problem in macros + * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit + * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit, + * setting it and not clearing it clears ZOL context + * + * Vineetg: May 16th, 2008 + * - r25 now contains the Current Task when in kernel + * + * Vineetg: Dec 22, 2007 + * Minor Surgery of Low Level ISR to make it SMP safe + * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR + * - _current_task is made an array of NR_CPUS + * - Access of _current_task wrapped inside a macro so that if hardware + * team agrees for a dedicated reg, no other code is touched + * + * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004 + */ + +#include +#include /* {EXTRY,EXIT} */ +#include +#include + + .cpu A7 + +;############################ Vector Table ################################# + +.macro VECTOR lbl +#if 1 /* Just in case, build breaks */ + j \lbl +#else + b \lbl + nop +#endif +.endm + + .section .vector, "ax",@progbits + .align 4 + +/* Each entry in the vector table must occupy 2 words. Since it is a jump + * across sections (.vector to .text) we are gauranteed that 'j somewhere' + * will use the 'j limm' form of the intrsuction as long as somewhere is in + * a section other than .vector. + */ + +; ********* Critical System Events ********************** +VECTOR res_service ; 0x0, Restart Vector (0x0) +VECTOR mem_service ; 0x8, Mem exception (0x1) +VECTOR instr_service ; 0x10, Instrn Error (0x2) + +; ******************** Device ISRs ********************** +#ifdef CONFIG_ARC_IRQ3_LV2 +VECTOR handle_interrupt_level2 +#else +VECTOR handle_interrupt_level1 +#endif + +VECTOR handle_interrupt_level1 + +#ifdef CONFIG_ARC_IRQ5_LV2 +VECTOR handle_interrupt_level2 +#else +VECTOR handle_interrupt_level1 +#endif + +#ifdef CONFIG_ARC_IRQ6_LV2 +VECTOR handle_interrupt_level2 +#else +VECTOR handle_interrupt_level1 +#endif + +.rept 25 +VECTOR handle_interrupt_level1 ; Other devices +.endr + +/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */ + +; ******************** Exceptions ********************** +VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20) +VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21) +VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22) +VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23) + ; or Misaligned Access +VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24) +VECTOR EV_Trap ; 0x128, Trap exception (0x25) +VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26) + +.rept 24 +VECTOR reserved ; Reserved Exceptions +.endr + + +;##################### Scratch Mem for IRQ stack switching ############# + +ARCFP_DATA int1_saved_reg + .align 32 + .type int1_saved_reg, @object + .size int1_saved_reg, 4 +int1_saved_reg: + .zero 4 + +/* Each Interrupt level needs its own scratch */ +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + +ARCFP_DATA int2_saved_reg + .type int2_saved_reg, @object + .size int2_saved_reg, 4 +int2_saved_reg: + .zero 4 + +#endif + +; --------------------------------------------- + .section .text, "ax",@progbits + +res_service: ; processor restart + flag 0x1 ; not implemented + nop + nop + +reserved: ; processor restart + rtie ; jump to processor initializations + +;##################### Interrupt Handling ############################## + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS +; --------------------------------------------- +; Level 2 ISR: Can interrupt a Level 1 ISR +; --------------------------------------------- +ENTRY(handle_interrupt_level2) + + INTERRUPT_PROLOGUE 2 + + ;------------------------------------------------------ + ; if L2 IRQ interrupted a L1 ISR, disable preemption + ;------------------------------------------------------ + + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) + bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal + + ; A1 is set in status32_l2 + ; bump thread_info->preempt_count (Disable preemption) + GET_CURR_THR_INFO_FROM_SP r10 + ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] + add r9, r9, 1 + st r9, [r10, THREAD_INFO_PREEMPT_COUNT] + +1: + ;------------------------------------------------------ + ; setup params for Linux common ISR and invoke it + ;------------------------------------------------------ + lr r0, [icause2] + and r0, r0, 0x1f + + bl.d @arch_do_IRQ + mov r1, sp + + mov r8,0x2 + sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg + + b ret_from_exception + +END(handle_interrupt_level2) + +#endif + +; --------------------------------------------- +; Level 1 ISR +; --------------------------------------------- +ENTRY(handle_interrupt_level1) + + INTERRUPT_PROLOGUE 1 + + lr r0, [icause1] + and r0, r0, 0x1f + +#ifdef CONFIG_TRACE_IRQFLAGS + ; icause1 needs to be read early, before calling tracing, which + ; can clobber scratch regs, hence use of stack to stash it + push r0 + TRACE_ASM_IRQ_DISABLE + pop r0 +#endif + + bl.d @arch_do_IRQ + mov r1, sp + + mov r8,0x1 + sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg + + b ret_from_exception +END(handle_interrupt_level1) + +;################### Non TLB Exception Handling ############################# + +; --------------------------------------------- +; Protection Violation Exception Handler +; --------------------------------------------- + +ENTRY(EV_TLBProtV) + + EXCEPTION_PROLOGUE + + lr r2, [ecr] + lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above) + + ; Exception auto-disables further Intr/exceptions. + ; Re-enable them by pretending to return from exception + ; (so rest of handler executes in pure K mode) + + FAKE_RET_FROM_EXCPN + + mov r1, sp ; Handle to pt_regs + + ;------ (5) Type of Protection Violation? ---------- + ; + ; ProtV Hardware Exception is triggered for Access Faults of 2 types + ; -Access Violaton : 00_23_(00|01|02|03)_00 + ; x r w r+w + ; -Unaligned Access : 00_23_04_00 + ; + bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f + + ;========= (6a) Access Violation Processing ======== + bl do_page_fault + b ret_from_exception + + ;========== (6b) Non aligned access ============ +4: + + SAVE_CALLEE_SAVED_USER + mov r2, sp ; callee_regs + + bl do_misaligned_access + + ; TBD: optimize - do this only if a callee reg was involved + ; either a dst of emulated LD/ST or src with address-writeback + RESTORE_CALLEE_SAVED_USER + + b ret_from_exception + +END(EV_TLBProtV) + +; Wrapper for Linux page fault handler called from EV_TLBMiss* +; Very similar to ProtV handler case (6a) above, but avoids the extra checks +; for Misaligned access +; +ENTRY(call_do_page_fault) + + EXCEPTION_PROLOGUE + lr r0, [efa] ; Faulting Data address + mov r1, sp + FAKE_RET_FROM_EXCPN + + mov blink, ret_from_exception + b do_page_fault + +END(call_do_page_fault) + +;############# Common Handlers for ARCompact and ARCv2 ############## + +#include "entry.S" + +;############# Return from Intr/Excp/Trap (ARC Specifics) ############## +; +; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) +; IRQ shd definitely not happen between now and rtie +; All 2 entry points to here already disable interrupts + +.Lrestore_regs: + + TRACE_ASM_IRQ_ENABLE + + lr r10, [status32] + + ; Restore REG File. In case multiple Events outstanding, + ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None + ; Note that we use realtime STATUS32 (not pt_regs->status32) to + ; decide that. + + ; if Returning from Exception + bbit0 r10, STATUS_AE_BIT, not_exception + EXCEPTION_EPILOGUE + rtie + + ; Not Exception so maybe Interrupts (Level 1 or 2) + +not_exception: + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + + ; Level 2 interrupt return Path - from hardware standpoint + bbit0 r10, STATUS_A2_BIT, not_level2_interrupt + + ;------------------------------------------------------------------ + ; However the context returning might not have taken L2 intr itself + ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret + ; Special considerations needed for the context which took L2 intr + + ld r9, [sp, PT_event] ; Ensure this is L2 intr context + brne r9, event_IRQ2, 149f + + ;------------------------------------------------------------------ + ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier + ; so that sched doesn't move to new task, causing L1 to be delayed + ; undeterministically. Now that we've achieved that, let's reset + ; things to what they were, before returning from L2 context + ;---------------------------------------------------------------- + + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) + bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal + + ; decrement thread_info->preempt_count (re-enable preemption) + GET_CURR_THR_INFO_FROM_SP r10 + ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] + + ; paranoid check, given A1 was active when A2 happened, preempt count + ; must not be 0 because we would have incremented it. + ; If this does happen we simply HALT as it means a BUG !!! + cmp r9, 0 + bnz 2f + flag 1 + +2: + sub r9, r9, 1 + st r9, [r10, THREAD_INFO_PREEMPT_COUNT] + +149: + ;return from level 2 + INTERRUPT_EPILOGUE 2 +debug_marker_l2: + rtie + +not_level2_interrupt: + +#endif + + bbit0 r10, STATUS_A1_BIT, not_level1_interrupt + + ;return from level 1 + INTERRUPT_EPILOGUE 1 +debug_marker_l1: + rtie + +not_level1_interrupt: + + ;this case is for syscalls or Exceptions (with fake rtie) + + EXCEPTION_EPILOGUE +debug_marker_syscall: + rtie diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 286d7dc0723b..75cdc56351d9 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -1,60 +1,13 @@ /* - * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC + * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC + * (included from entry-.S * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * vineetg: May 2011 - * -Userspace unaligned access emulation - * - * vineetg: Feb 2011 (ptrace low level code fixes) - * -traced syscall return code (r0) was not saved into pt_regs for restoring - * into user reg-file when traded task rets to user space. - * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs) - * were not invoking post-syscall trace hook (jumping directly into - * ret_from_system_call) - * - * vineetg: Nov 2010: - * -Vector table jumps (@8 bytes) converted into branches (@4 bytes) - * -To maintain the slot size of 8 bytes/vector, added nop, which is - * not executed at runtime. - * - * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) - * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well - * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't - * need ptregs anymore - * - * Vineetg: Oct 2009 - * -In a rare scenario, Process gets a Priv-V exception and gets scheduled - * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains - * active (AE bit enabled). This causes a double fault for a subseq valid - * exception. Thus FAKE RTIE needed in low level Priv-Violation handler. - * Instr Error could also cause similar scenario, so same there as well. - * - * Vineetg: March 2009 (Supporting 2 levels of Interrupts) - * - * Vineetg: Aug 28th 2008: Bug #94984 - * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap - * Normally CPU does this automatically, however when doing FAKE rtie, - * we need to explicitly do this. The problem in macros - * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit - * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit, - * setting it and not clearing it clears ZOL context - * - * Vineetg: May 16th, 2008 - * - r25 now contains the Current Task when in kernel - * - * Vineetg: Dec 22, 2007 - * Minor Surgery of Low Level ISR to make it SMP safe - * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR - * - _current_task is made an array of NR_CPUS - * - Access of _current_task wrapped inside a macro so that if hardware - * team agrees for a dedicated reg, no other code is touched - * - * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004 */ /*------------------------------------------------------------------ @@ -67,187 +20,10 @@ * Global Pointer (gp) r26 * Frame Pointer (fp) r27 * Stack Pointer (sp) r28 - * Interrupt link register (ilink1) r29 - * Interrupt link register (ilink2) r30 * Branch link register (blink) r31 *------------------------------------------------------------------ */ - .cpu A7 - -;############################ Vector Table ################################# - -.macro VECTOR lbl -#if 1 /* Just in case, build breaks */ - j \lbl -#else - b \lbl - nop -#endif -.endm - - .section .vector, "ax",@progbits - .align 4 - -/* Each entry in the vector table must occupy 2 words. Since it is a jump - * across sections (.vector to .text) we are gauranteed that 'j somewhere' - * will use the 'j limm' form of the intrsuction as long as somewhere is in - * a section other than .vector. - */ - -; ********* Critical System Events ********************** -VECTOR res_service ; 0x0, Restart Vector (0x0) -VECTOR mem_service ; 0x8, Mem exception (0x1) -VECTOR instr_service ; 0x10, Instrn Error (0x2) - -; ******************** Device ISRs ********************** -#ifdef CONFIG_ARC_IRQ3_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -VECTOR handle_interrupt_level1 - -#ifdef CONFIG_ARC_IRQ5_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -#ifdef CONFIG_ARC_IRQ6_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -.rept 25 -VECTOR handle_interrupt_level1 ; Other devices -.endr - -/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */ - -; ******************** Exceptions ********************** -VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20) -VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21) -VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22) -VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23) - ; or Misaligned Access -VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24) -VECTOR EV_Trap ; 0x128, Trap exception (0x25) -VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26) - -.rept 24 -VECTOR reserved ; Reserved Exceptions -.endr - -#include /* {EXTRY,EXIT} */ -#include /* SAVE_ALL_{INT1,INT2,SYS...} */ -#include -#include -#include - -;##################### Scratch Mem for IRQ stack switching ############# - -ARCFP_DATA int1_saved_reg - .align 32 - .type int1_saved_reg, @object - .size int1_saved_reg, 4 -int1_saved_reg: - .zero 4 - -/* Each Interrupt level needs its own scratch */ -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - -ARCFP_DATA int2_saved_reg - .type int2_saved_reg, @object - .size int2_saved_reg, 4 -int2_saved_reg: - .zero 4 - -#endif - -; --------------------------------------------- - .section .text, "ax",@progbits - -res_service: ; processor restart - flag 0x1 ; not implemented - nop - nop - -reserved: ; processor restart - rtie ; jump to processor initializations - -;##################### Interrupt Handling ############################## - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS -; --------------------------------------------- -; Level 2 ISR: Can interrupt a Level 1 ISR -; --------------------------------------------- -ENTRY(handle_interrupt_level2) - - INTERRUPT_PROLOGUE 2 - - ;------------------------------------------------------ - ; if L2 IRQ interrupted a L1 ISR, disable preemption - ;------------------------------------------------------ - - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) - bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal - - ; A1 is set in status32_l2 - ; bump thread_info->preempt_count (Disable preemption) - GET_CURR_THR_INFO_FROM_SP r10 - ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] - add r9, r9, 1 - st r9, [r10, THREAD_INFO_PREEMPT_COUNT] - -1: - ;------------------------------------------------------ - ; setup params for Linux common ISR and invoke it - ;------------------------------------------------------ - lr r0, [icause2] - and r0, r0, 0x1f - - bl.d @arch_do_IRQ - mov r1, sp - - mov r8,0x2 - sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg - - b ret_from_exception - -END(handle_interrupt_level2) - -#endif - -; --------------------------------------------- -; Level 1 ISR -; --------------------------------------------- -ENTRY(handle_interrupt_level1) - - INTERRUPT_PROLOGUE 1 - - lr r0, [icause1] - and r0, r0, 0x1f - -#ifdef CONFIG_TRACE_IRQFLAGS - ; icause1 needs to be read early, before calling tracing, which - ; can clobber scratch regs, hence use of stack to stash it - push r0 - TRACE_ASM_IRQ_DISABLE - pop r0 -#endif - - bl.d @arch_do_IRQ - mov r1, sp - - mov r8,0x1 - sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg - - b ret_from_exception -END(handle_interrupt_level1) - ;################### Non TLB Exception Handling ############################# ; --------------------------------------------- @@ -314,70 +90,6 @@ ENTRY(EV_MachineCheck) END(EV_MachineCheck) -; --------------------------------------------- -; Protection Violation Exception Handler -; --------------------------------------------- - -ENTRY(EV_TLBProtV) - - EXCEPTION_PROLOGUE - - lr r2, [ecr] - lr r0, [efa] ; Faulting Data addr (not part of pt_regs saved above) - - ; Exception auto-disables further Intr/exceptions. - ; Re-enable them by pretending to return from exception - ; (so rest of handler executes in pure K mode) - - FAKE_RET_FROM_EXCPN - - mov r1, sp ; Handle to pt_regs - - ;------ (5) Type of Protection Violation? ---------- - ; - ; ProtV Hardware Exception is triggered for Access Faults of 2 types - ; -Access Violaton : 00_23_(00|01|02|03)_00 - ; x r w r+w - ; -Unaligned Access : 00_23_04_00 - ; - bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f - - ;========= (6a) Access Violation Processing ======== - bl do_page_fault - b ret_from_exception - - ;========== (6b) Non aligned access ============ -4: - - SAVE_CALLEE_SAVED_USER - mov r2, sp ; callee_regs - - bl do_misaligned_access - - ; TBD: optimize - do this only if a callee reg was involved - ; either a dst of emulated LD/ST or src with address-writeback - RESTORE_CALLEE_SAVED_USER - - b ret_from_exception - -END(EV_TLBProtV) - -; Wrapper for Linux page fault handler called from EV_TLBMiss* -; Very similar to ProtV handler case (6a) above, but avoids the extra checks -; for Misaligned access -; -ENTRY(call_do_page_fault) - - EXCEPTION_PROLOGUE - lr r0, [efa] ; Faulting Data address - mov r1, sp - FAKE_RET_FROM_EXCPN - - mov blink, ret_from_exception - b do_page_fault - -END(call_do_page_fault) - ; --------------------------------------------- ; Privilege Violation Exception Handler ; --------------------------------------------- @@ -625,97 +337,7 @@ resume_kernel_mode: ; preempt_schedule_irq() always returns with IRQ disabled #endif - ; fall through - -;############# Return from Intr/Excp/Trap (ARC Specifics) ############## -; -; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) -; IRQ shd definitely not happen between now and rtie -; All 2 entry points to here already disable interrupts - -.Lrestore_regs: - - TRACE_ASM_IRQ_ENABLE - - lr r10, [status32] - - ; Restore REG File. In case multiple Events outstanding, - ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None - ; Note that we use realtime STATUS32 (not pt_regs->status32) to - ; decide that. - - ; if Returning from Exception - bbit0 r10, STATUS_AE_BIT, not_exception - EXCEPTION_EPILOGUE - rtie - - ; Not Exception so maybe Interrupts (Level 1 or 2) - -not_exception: - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - - ; Level 2 interrupt return Path - from hardware standpoint - bbit0 r10, STATUS_A2_BIT, not_level2_interrupt - - ;------------------------------------------------------------------ - ; However the context returning might not have taken L2 intr itself - ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret - ; Special considerations needed for the context which took L2 intr - - ld r9, [sp, PT_event] ; Ensure this is L2 intr context - brne r9, event_IRQ2, 149f - - ;------------------------------------------------------------------ - ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier - ; so that sched doesn't move to new task, causing L1 to be delayed - ; undeterministically. Now that we've achieved that, let's reset - ; things to what they were, before returning from L2 context - ;---------------------------------------------------------------- - - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) - bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal - - ; decrement thread_info->preempt_count (re-enable preemption) - GET_CURR_THR_INFO_FROM_SP r10 - ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] - - ; paranoid check, given A1 was active when A2 happened, preempt count - ; must not be 0 because we would have incremented it. - ; If this does happen we simply HALT as it means a BUG !!! - cmp r9, 0 - bnz 2f - flag 1 - -2: - sub r9, r9, 1 - st r9, [r10, THREAD_INFO_PREEMPT_COUNT] - -149: - ;return from level 2 - INTERRUPT_EPILOGUE 2 -debug_marker_l2: - rtie - -not_level2_interrupt: - -#endif - - bbit0 r10, STATUS_A1_BIT, not_level1_interrupt - - ;return from level 1 - INTERRUPT_EPILOGUE 1 -debug_marker_l1: - rtie - -not_level1_interrupt: - - ;this case is for syscalls or Exceptions (with fake rtie) - - EXCEPTION_EPILOGUE -debug_marker_syscall: - rtie - + b .Lrestore_regs END(ret_from_exception) ENTRY(ret_from_fork) @@ -762,4 +384,7 @@ END(sys_clone_wrapper) ; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag) ; would not work after a clean build due to kernel build system dependencies. .section .debug_frame, "wa",@progbits + +; Reset to .text as this file is included in entry-.S +.section .text, "ax",@progbits #endif -- cgit v1.2.3-58-ga151 From 62fb64034d30293448de10a48c7ee47ee978e338 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 13 Mar 2015 11:50:23 +0530 Subject: ARC: entry.S: micro-optimize Trap handler Elide the need to re-read ECR in Trap handler by ensuring that EXCEPTION_PROLOGUE does that at the very end just before returning to Trap handler ARCv2 EXCEPTION_PROLOGUE already did that, so same for ARcompact and the common trap handler adjusted to use cached ECR Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 5 +++-- arch/arc/kernel/entry.S | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 6c0a81b598d2..8b65eb36655a 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -181,8 +181,6 @@ /* ARC700 doesn't provide auto-stack switching */ SWITCH_TO_KERNEL_STK - lr r9, [ecr] - st r9, [sp, 8] /* ECR */ st r0, [sp, 4] /* orig_r0, needed only for sys calls */ /* Restore r9 used to code the early prologue */ @@ -198,6 +196,9 @@ PUSHAX lp_end PUSHAX lp_start PUSHAX erbta + + lr r9, [ecr] + st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */ .endm /*-------------------------------------------------------------- diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 603266eb75e1..f7a82fd4d601 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -258,8 +258,8 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE ;============ TRAP 1 :breakpoints - lr r10, [ecr] - bmsk.f 0, r10, 7 + ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR) + bmsk.f 0, r9, 7 bnz trap_with_param ;============ TRAP (no param): syscall top level -- cgit v1.2.3-58-ga151 From 5a343b9fe22995c43b1209b0b63ea0fa2824cbae Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 28 Mar 2015 21:36:00 +0530 Subject: ARC: entry.S: [arcompact] simplify SWITCH_TO_KERNEL_STK Previously this macro was overloaded with stack switching, saving SP at right slot in pt_regs, saving/setup of r25 and setting SP baseline to where pt_regs->sp is saved (vs. bottom of pt_regs) Now it only does SP switch, and leaves SP pointing to bottom of pt_regs. r25 saving is no longer done here to allow for future reordering of regfile in pt_regs w/o touching this macro Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 71 ++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 36 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 8b65eb36655a..1174ddd4cac8 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -39,8 +39,8 @@ * Switch to Kernel Mode stack if SP points to User Mode stack * * Entry : r9 contains pre-IRQ/exception/trap status32 - * Exit : SP is set to kernel mode stack pointer - * If CURR_IN_REG, r25 set to "current" task pointer + * Exit : SP set to K mode stack + * SP at the time of entry (K/U) saved @ pt_regs->sp * Clobbers: r9 *-------------------------------------------------------------*/ @@ -80,12 +80,11 @@ #endif - /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack - * safe-keeping not really needed, but it keeps the epilogue code - * (SP restore) simpler/uniform. - */ + /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */ + /* save it nevertheless @ pt_regs->sp for uniformity */ + b.d 66f - mov r9, sp + st sp, [sp, PT_sp - SZ_PT_REGS] 88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ @@ -94,30 +93,12 @@ /* With current tsk in r9, get it's kernel mode stack base */ GET_TSK_STACK_BASE r9, r9 -66: -#ifdef CONFIG_ARC_CURR_IN_REG - /* - * Treat r25 as scratch reg, save it on stack first - * Load it with current task pointer - */ - st r25, [r9, -4] - GET_CURR_TASK_ON_CPU r25 -#endif - - /* Save Pre Intr/Exception User SP on kernel stack */ - st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25 - - /* CAUTION: - * SP should be set at the very end when we are done with everything - * In case of 2 levels of interrupt we depend on value of SP to assume - * that everything else is done (loading r25 etc) - */ - - /* set SP to point to kernel mode stack */ - mov sp, r9 - - /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ + /* save U mode SP @ pt_regs->sp */ + st sp, [r9, PT_sp - SZ_PT_REGS] + /* final SP switch */ + mov sp, r9 +66: .endm /*------------------------------------------------------------ @@ -181,11 +162,21 @@ /* ARC700 doesn't provide auto-stack switching */ SWITCH_TO_KERNEL_STK - st r0, [sp, 4] /* orig_r0, needed only for sys calls */ +#ifdef CONFIG_ARC_CURR_IN_REG + /* Treat r25 as scratch reg (save on stack) and load with "current" */ + PUSH r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + + st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */ + sub sp, sp, 4 /* skip pt_regs->sp, already saved above */ /* Restore r9 used to code the early prologue */ PROLOG_RESTORE_REG r9, @ex_saved_reg1 + /* now we are ready to save the regfile */ SAVE_R0_TO_R12 PUSH gp PUSH fp @@ -245,12 +236,20 @@ SWITCH_TO_KERNEL_STK - /* restore original r9 */ - PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg +#ifdef CONFIG_ARC_CURR_IN_REG + /* Treat r25 as scratch reg (save on stack) and load with "current" */ + PUSH r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif - /* now we are ready to save the remaining context */ - st 0x003\LVL\()abcd, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ + PUSH 0x003\LVL\()abcd /* Dummy ECR */ + sub sp, sp, 8 /* skip orig_r0 (not needed) + skip pt_regs->sp, already saved above */ + + /* Restore r9 used to code the early prologue */ + PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg SAVE_R0_TO_R12 PUSH gp -- cgit v1.2.3-58-ga151 From 5793e273a134331d05ed904e5be3b31ccfca54c1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Mar 2015 19:13:56 +0530 Subject: ARC: intc: split into ARCompact ISA specific, common bits Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 1 + arch/arc/include/asm/irqflags-compact.h | 181 +++++++++++++++++++++++++ arch/arc/include/asm/irqflags.h | 168 +----------------------- arch/arc/kernel/Makefile | 2 +- arch/arc/kernel/intc-compact.c | 226 ++++++++++++++++++++++++++++++++ arch/arc/kernel/irq.c | 210 ----------------------------- arch/arc/kernel/process.c | 3 +- 7 files changed, 412 insertions(+), 379 deletions(-) create mode 100644 arch/arc/include/asm/irqflags-compact.h create mode 100644 arch/arc/kernel/intc-compact.c (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 1174ddd4cac8..415443c2a8c4 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -33,6 +33,7 @@ #define __ASM_ARC_ENTRY_COMPACT_H #include +#include #include /* For THREAD_SIZE */ /*-------------------------------------------------------------- diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h new file mode 100644 index 000000000000..18f3634ac347 --- /dev/null +++ b/arch/arc/include/asm/irqflags-compact.h @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_IRQFLAGS_ARCOMPACT_H +#define __ASM_IRQFLAGS_ARCOMPACT_H + +/* vineetg: March 2010 : local_irq_save( ) optimisation + * -Remove explicit mov of current status32 into reg, that is not needed + * -Use BIC insn instead of INVERTED + AND + * -Conditionally disable interrupts (if they are not enabled, don't disable) +*/ + +#include + +/* status32 Reg bits related to Interrupt Handling */ +#define STATUS_E1_BIT 1 /* Int 1 enable */ +#define STATUS_E2_BIT 2 /* Int 2 enable */ +#define STATUS_A1_BIT 3 /* Int 1 active */ +#define STATUS_A2_BIT 4 /* Int 2 active */ + +#define STATUS_E1_MASK (1< - -/* status32 Reg bits related to Interrupt Handling */ -#define STATUS_E1_BIT 1 /* Int 1 enable */ -#define STATUS_E2_BIT 2 /* Int 2 enable */ -#define STATUS_A1_BIT 3 /* Int 1 active */ -#define STATUS_A2_BIT 4 /* Int 2 active */ - -#define STATUS_E1_MASK (1< #endif diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 024a63e90b72..cc929c0e2133 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -10,7 +10,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o -obj-y += entry-compact.o +obj-y += entry-compact.o intc-compact.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c new file mode 100644 index 000000000000..fcdddb631766 --- /dev/null +++ b/arch/arc/kernel/intc-compact.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include "../../drivers/irqchip/irqchip.h" +#include + +/* + * Early Hardware specific Interrupt setup + * -Platform independent, needed for each CPU (not foldable into init_IRQ) + * -Called very early (start_kernel -> setup_arch -> setup_processor) + * + * what it does ? + * -Optionally, setup the High priority Interrupts as Level 2 IRQs + */ +void arc_init_IRQ(void) +{ + int level_mask = 0; + + /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; + + /* + * Write to register, even if no LV2 IRQs configured to reset it + * in case bootloader had mucked with it + */ + write_aux_reg(AUX_IRQ_LEV, level_mask); + + if (level_mask) + pr_info("Level-2 interrupts bitset %x\n", level_mask); +} + +/* + * ARC700 core includes a simple on-chip intc supporting + * -per IRQ enable/disable + * -2 levels of interrupts (high/low) + * -all interrupts being level triggered + * + * To reduce platform code, we assume all IRQs directly hooked-up into intc. + * Platforms with external intc, hence cascaded IRQs, are free to over-ride + * below, per IRQ. + */ + +static void arc_irq_mask(struct irq_data *data) +{ + unsigned int ienb; + + ienb = read_aux_reg(AUX_IENABLE); + ienb &= ~(1 << data->irq); + write_aux_reg(AUX_IENABLE, ienb); +} + +static void arc_irq_unmask(struct irq_data *data) +{ + unsigned int ienb; + + ienb = read_aux_reg(AUX_IENABLE); + ienb |= (1 << data->irq); + write_aux_reg(AUX_IENABLE, ienb); +} + +static struct irq_chip onchip_intc = { + .name = "ARC In-core Intc", + .irq_mask = arc_irq_mask, + .irq_unmask = arc_irq_unmask, +}; + +static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* + * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core + * code doesn't own it (like TIMER0). ISS IDU / ezchip define it + * in platform header which can't be included here as it goes + * against multi-platform image philisophy + */ + if (irq == TIMER0_IRQ) + irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); + else + irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); + + return 0; +} + +static const struct irq_domain_ops arc_intc_domain_ops = { + .xlate = irq_domain_xlate_onecell, + .map = arc_intc_domain_map, +}; + +static struct irq_domain *root_domain; + +static int __init +init_onchip_IRQ(struct device_node *intc, struct device_node *parent) +{ + if (parent) + panic("DeviceTree incore intc not a root irq controller\n"); + + root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0, + &arc_intc_domain_ops, NULL); + + if (!root_domain) + panic("root irq domain not avail\n"); + + /* with this we don't need to export root_domain */ + irq_set_default_host(root_domain); + + return 0; +} + +IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); + +/* + * arch_local_irq_enable - Enable interrupts. + * + * 1. Explicitly called to re-enable interrupts + * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc + * which maybe in hard ISR itself + * + * Semantics of this function change depending on where it is called from: + * + * -If called from hard-ISR, it must not invert interrupt priorities + * e.g. suppose TIMER is high priority (Level 2) IRQ + * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. + * Here local_irq_enable( ) shd not re-enable lower priority interrupts + * -If called from soft-ISR, it must re-enable all interrupts + * soft ISR are low prioity jobs which can be very slow, thus all IRQs + * must be enabled while they run. + * Now hardware context wise we may still be in L2 ISR (not done rtie) + * still we must re-enable both L1 and L2 IRQs + * Another twist is prev scenario with flow being + * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR + * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get + * over-written (this is deficiency in ARC700 Interrupt mechanism) + */ + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */ + +void arch_local_irq_enable(void) +{ + + unsigned long flags = arch_local_save_flags(); + + /* Allow both L1 and L2 at the onset */ + flags |= (STATUS_E1_MASK | STATUS_E2_MASK); + + /* Called from hard ISR (between irq_enter and irq_exit) */ + if (in_irq()) { + + /* If in L2 ISR, don't re-enable any further IRQs as this can + * cause IRQ priorities to get upside down. e.g. it could allow + * L1 be taken while in L2 hard ISR which is wrong not only in + * theory, it can also cause the dreaded L1-L2-L1 scenario + */ + if (flags & STATUS_A2_MASK) + flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); + + /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ + else if (flags & STATUS_A1_MASK) + flags &= ~(STATUS_E1_MASK); + } + + /* called from soft IRQ, ideally we want to re-enable all levels */ + + else if (in_softirq()) { + + /* However if this is case of L1 interrupted by L2, + * re-enabling both may cause whaco L1-L2-L1 scenario + * because ARC700 allows level 1 to interrupt an active L2 ISR + * Thus we disable both + * However some code, executing in soft ISR wants some IRQs + * to be enabled so we re-enable L2 only + * + * How do we determine L1 intr by L2 + * -A2 is set (means in L2 ISR) + * -E1 is set in this ISR's pt_regs->status32 which is + * saved copy of status32_l2 when l2 ISR happened + */ + struct pt_regs *pt = get_irq_regs(); + + if ((flags & STATUS_A2_MASK) && pt && + (pt->status32 & STATUS_A1_MASK)) { + /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ + flags &= ~(STATUS_E1_MASK); + } + } + + arch_local_irq_restore(flags); +} + +#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ + +/* + * Simpler version for only 1 level of interrupt + * Here we only Worry about Level 1 Bits + */ +void arch_local_irq_enable(void) +{ + unsigned long flags; + + /* + * ARC IDE Drivers tries to re-enable interrupts from hard-isr + * context which is simply wrong + */ + if (in_irq()) { + WARN_ONCE(1, "IRQ enabled from hard-isr"); + return; + } + + flags = arch_local_save_flags(); + flags |= (STATUS_E1_MASK | STATUS_E2_MASK); + arch_local_irq_restore(flags); +} +#endif +EXPORT_SYMBOL(arch_local_irq_enable); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 620ec2fe32a9..2989a7bcf8a8 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -8,115 +8,9 @@ */ #include -#include -#include -#include #include -#include "../../drivers/irqchip/irqchip.h" -#include -#include #include -/* - * Early Hardware specific Interrupt setup - * -Platform independent, needed for each CPU (not foldable into init_IRQ) - * -Called very early (start_kernel -> setup_arch -> setup_processor) - * - * what it does ? - * -Optionally, setup the High priority Interrupts as Level 2 IRQs - */ -void arc_init_IRQ(void) -{ - int level_mask = 0; - - /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; - - /* - * Write to register, even if no LV2 IRQs configured to reset it - * in case bootloader had mucked with it - */ - write_aux_reg(AUX_IRQ_LEV, level_mask); - - if (level_mask) - pr_info("Level-2 interrupts bitset %x\n", level_mask); -} - -/* - * ARC700 core includes a simple on-chip intc supporting - * -per IRQ enable/disable - * -2 levels of interrupts (high/low) - * -all interrupts being level triggered - * - * To reduce platform code, we assume all IRQs directly hooked-up into intc. - * Platforms with external intc, hence cascaded IRQs, are free to over-ride - * below, per IRQ. - */ - -static void arc_irq_mask(struct irq_data *data) -{ - unsigned int ienb; - - ienb = read_aux_reg(AUX_IENABLE); - ienb &= ~(1 << data->irq); - write_aux_reg(AUX_IENABLE, ienb); -} - -static void arc_irq_unmask(struct irq_data *data) -{ - unsigned int ienb; - - ienb = read_aux_reg(AUX_IENABLE); - ienb |= (1 << data->irq); - write_aux_reg(AUX_IENABLE, ienb); -} - -static struct irq_chip onchip_intc = { - .name = "ARC In-core Intc", - .irq_mask = arc_irq_mask, - .irq_unmask = arc_irq_unmask, -}; - -static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hw) -{ - if (irq == TIMER0_IRQ) - irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); - else - irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - - return 0; -} - -static const struct irq_domain_ops arc_intc_domain_ops = { - .xlate = irq_domain_xlate_onecell, - .map = arc_intc_domain_map, -}; - -static struct irq_domain *root_domain; - -static int __init -init_onchip_IRQ(struct device_node *intc, struct device_node *parent) -{ - if (parent) - panic("DeviceTree incore intc not a root irq controller\n"); - - root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0, - &arc_intc_domain_ops, NULL); - - if (!root_domain) - panic("root irq domain not avail\n"); - - /* with this we don't need to export root_domain */ - irq_set_default_host(root_domain); - - return 0; -} - -IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); - /* * Late Interrupt system init called from start_kernel for Boot CPU only * @@ -178,107 +72,3 @@ void arc_request_percpu_irq(int irq, int cpu, enable_percpu_irq(irq, 0); } - -/* - * arch_local_irq_enable - Enable interrupts. - * - * 1. Explicitly called to re-enable interrupts - * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc - * which maybe in hard ISR itself - * - * Semantics of this function change depending on where it is called from: - * - * -If called from hard-ISR, it must not invert interrupt priorities - * e.g. suppose TIMER is high priority (Level 2) IRQ - * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. - * Here local_irq_enable( ) shd not re-enable lower priority interrupts - * -If called from soft-ISR, it must re-enable all interrupts - * soft ISR are low prioity jobs which can be very slow, thus all IRQs - * must be enabled while they run. - * Now hardware context wise we may still be in L2 ISR (not done rtie) - * still we must re-enable both L1 and L2 IRQs - * Another twist is prev scenario with flow being - * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR - * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get - * over-written (this is deficiency in ARC700 Interrupt mechanism) - */ - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */ - -void arch_local_irq_enable(void) -{ - - unsigned long flags; - flags = arch_local_save_flags(); - - /* Allow both L1 and L2 at the onset */ - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - - /* Called from hard ISR (between irq_enter and irq_exit) */ - if (in_irq()) { - - /* If in L2 ISR, don't re-enable any further IRQs as this can - * cause IRQ priorities to get upside down. e.g. it could allow - * L1 be taken while in L2 hard ISR which is wrong not only in - * theory, it can also cause the dreaded L1-L2-L1 scenario - */ - if (flags & STATUS_A2_MASK) - flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); - - /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ - else if (flags & STATUS_A1_MASK) - flags &= ~(STATUS_E1_MASK); - } - - /* called from soft IRQ, ideally we want to re-enable all levels */ - - else if (in_softirq()) { - - /* However if this is case of L1 interrupted by L2, - * re-enabling both may cause whaco L1-L2-L1 scenario - * because ARC700 allows level 1 to interrupt an active L2 ISR - * Thus we disable both - * However some code, executing in soft ISR wants some IRQs - * to be enabled so we re-enable L2 only - * - * How do we determine L1 intr by L2 - * -A2 is set (means in L2 ISR) - * -E1 is set in this ISR's pt_regs->status32 which is - * saved copy of status32_l2 when l2 ISR happened - */ - struct pt_regs *pt = get_irq_regs(); - if ((flags & STATUS_A2_MASK) && pt && - (pt->status32 & STATUS_A1_MASK)) { - /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ - flags &= ~(STATUS_E1_MASK); - } - } - - arch_local_irq_restore(flags); -} - -#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ - -/* - * Simpler version for only 1 level of interrupt - * Here we only Worry about Level 1 Bits - */ -void arch_local_irq_enable(void) -{ - unsigned long flags; - - /* - * ARC IDE Drivers tries to re-enable interrupts from hard-isr - * context which is simply wrong - */ - if (in_irq()) { - WARN_ONCE(1, "IRQ enabled from hard-isr"); - return; - } - - flags = arch_local_save_flags(); - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - arch_local_irq_restore(flags); -} -#endif -EXPORT_SYMBOL(arch_local_irq_enable); diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e095c557afdd..b5426babd3c8 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -166,8 +166,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) * [L] ZOL loop inhibited to begin with - cleared by a LP insn * Interrupts enabled */ - regs->status32 = STATUS_U_MASK | STATUS_L_MASK | - STATUS_E1_MASK | STATUS_E2_MASK; + regs->status32 = STATUS_U_MASK | STATUS_L_MASK | STATUS_IE_MASK; /* bogus seed values for debugging */ regs->lp_start = 0x10; -- cgit v1.2.3-58-ga151 From 10d11e580c50f8a6718a58f92198dbc031e63b0a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 16 Apr 2015 19:49:12 +0530 Subject: ARC: uncached base is hard constant for ARC, don't save it ioremap already uses the hard define, just make sure BCR value matches that Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 1 - arch/arc/kernel/setup.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 3ab66fcd9df1..336a9f694c2e 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -320,7 +320,6 @@ struct cpuinfo_arc { struct bcr_isa isa; struct bcr_timer timers; unsigned int vec_base; - unsigned int uncached_base; struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index c6e8b72ff293..96d44805ea56 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -54,7 +54,7 @@ static void read_arc_build_cfg_regs(void) cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); - cpu->uncached_base = uncached_space.start << 24; + BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE); READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); @@ -218,7 +218,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\nUncached Base\t: %#x\n", - cpu->vec_base, cpu->uncached_base); + cpu->vec_base, ARC_UNCACHED_ADDR_SPACE); if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", -- cgit v1.2.3-58-ga151 From 820970a5aa3c98be26e1df64da4b93294d20d4e7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 6 Mar 2015 14:08:20 +0530 Subject: ARCv2: [intc] HS38 core interrupt controller Cc: Jason Cooper Cc: Thomas Gleixner Signed-off-by: Vineet Gupta --- .../devicetree/bindings/arc/archs-intc.txt | 22 ++++ arch/arc/include/asm/arcregs.h | 1 + arch/arc/include/asm/irqflags-arcv2.h | 116 +++++++++++++++++ arch/arc/kernel/intc-arcv2.c | 143 +++++++++++++++++++++ 4 files changed, 282 insertions(+) create mode 100644 Documentation/devicetree/bindings/arc/archs-intc.txt create mode 100644 arch/arc/include/asm/irqflags-arcv2.h create mode 100644 arch/arc/kernel/intc-arcv2.c (limited to 'arch/arc/include') diff --git a/Documentation/devicetree/bindings/arc/archs-intc.txt b/Documentation/devicetree/bindings/arc/archs-intc.txt new file mode 100644 index 000000000000..69f326d6a5ad --- /dev/null +++ b/Documentation/devicetree/bindings/arc/archs-intc.txt @@ -0,0 +1,22 @@ +* ARC-HS incore Interrupt Controller (Provided by cores implementing ARCv2 ISA) + +Properties: + +- compatible: "snps,archs-intc" +- interrupt-controller: This is an interrupt controller. +- #interrupt-cells: Must be <1>. + + Single Cell "interrupts" property of a device specifies the IRQ number + between 16 to 256 + + intc accessed via the special ARC AUX register interface, hence "reg" property + is not specified. + +Example: + + intc: interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + interrupts = <16 17 18 19 20 21 22 23 24 25>; + }; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 336a9f694c2e..649646579986 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -31,6 +31,7 @@ #define ARC_REG_BPU_BCR 0xc0 #define ARC_REG_ISA_CFG_BCR 0xc1 #define ARC_REG_RTT_BCR 0xF2 +#define ARC_REG_IRQ_BCR 0xF3 #define ARC_REG_SMART_BCR 0xFF /* status32 Bits Positions */ diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h new file mode 100644 index 000000000000..c946c56f141c --- /dev/null +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_IRQFLAGS_ARCV2_H +#define __ASM_IRQFLAGS_ARCV2_H + +#include + +/* status32 Bits */ +#define STATUS_AD_BIT 19 /* Disable Align chk: core supports non-aligned */ +#define STATUS_IE_BIT 31 + +#define STATUS_AD_MASK (1< +#include +#include +#include +#include +#include "../../drivers/irqchip/irqchip.h" +#include + +/* + * Early Hardware specific Interrupt setup + * -Called very early (start_kernel -> setup_arch -> setup_processor) + * -Platform Independent (must for any ARC Core) + * -Needed for each CPU (hence not foldable into init_IRQ) + */ +void arc_init_IRQ(void) +{ + unsigned int tmp; + + struct aux_irq_ctrl { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int res3:18, save_idx_regs:1, res2:1, + save_u_to_u:1, save_lp_regs:1, save_blink:1, + res:4, save_nr_gpr_pairs:5; +#else + unsigned int save_nr_gpr_pairs:5, res:4, + save_blink:1, save_lp_regs:1, save_u_to_u:1, + res2:1, save_idx_regs:1, res3:18; +#endif + } ictrl; + + *(unsigned int *)&ictrl = 0; + + ictrl.save_nr_gpr_pairs = 6; /* r0 to r11 (r12 saved manually) */ + ictrl.save_blink = 1; + ictrl.save_lp_regs = 1; /* LP_COUNT, LP_START, LP_END */ + ictrl.save_u_to_u = 0; /* user ctxt saved on kernel stack */ + ictrl.save_idx_regs = 1; /* JLI, LDI, EI */ + + WRITE_AUX(AUX_IRQ_CTRL, ictrl); + + /* setup status32, don't enable intr yet as kernel doesn't want */ + tmp = read_aux_reg(0xa); + tmp |= ISA_INIT_STATUS_BITS; + tmp &= ~STATUS_IE_MASK; + asm volatile("flag %0 \n"::"r"(tmp)); + + /* + * ARCv2 core intc provides multiple interrupt priorities (upto 16). + * Typical builds though have only two levels (0-high, 1-low) + * Linux by default uses lower prio 1 for most irqs, reserving 0 for + * NMI style interrupts in future (say perf) + * + * Read the intc BCR to confirm that Linux default priority is avail + * in h/w + * + * Note: + * IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level + * is 0 based. + */ + tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF; + if (ARCV2_IRQ_DEF_PRIO > tmp) + panic("Linux default irq prio incorrect\n"); +} + +static void arcv2_irq_mask(struct irq_data *data) +{ + write_aux_reg(AUX_IRQ_SELECT, data->irq); + write_aux_reg(AUX_IRQ_ENABLE, 0); +} + +static void arcv2_irq_unmask(struct irq_data *data) +{ + write_aux_reg(AUX_IRQ_SELECT, data->irq); + write_aux_reg(AUX_IRQ_ENABLE, 1); +} + +void arcv2_irq_enable(struct irq_data *data) +{ + /* set default priority */ + write_aux_reg(AUX_IRQ_SELECT, data->irq); + write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO); + + /* + * hw auto enables (linux unmask) all by default + * So no need to do IRQ_ENABLE here + * XXX: However OSCI LAN need it + */ + write_aux_reg(AUX_IRQ_ENABLE, 1); +} + +static struct irq_chip arcv2_irq_chip = { + .name = "ARCv2 core Intc", + .irq_mask = arcv2_irq_mask, + .irq_unmask = arcv2_irq_unmask, + .irq_enable = arcv2_irq_enable +}; + +static int arcv2_irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + if (irq == TIMER0_IRQ) + irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq); + else + irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq); + + return 0; +} + +static const struct irq_domain_ops arcv2_irq_ops = { + .xlate = irq_domain_xlate_onecell, + .map = arcv2_irq_map, +}; + +static struct irq_domain *root_domain; + +static int __init +init_onchip_IRQ(struct device_node *intc, struct device_node *parent) +{ + if (parent) + panic("DeviceTree incore intc not a root irq controller\n"); + + root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0, + &arcv2_irq_ops, NULL); + + if (!root_domain) + panic("root irq domain not avail\n"); + + /* with this we don't need to export root_domain */ + irq_set_default_host(root_domain); + + return 0; +} + +IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ); -- cgit v1.2.3-58-ga151 From 1f6ccfff6314672743ad7252160654709e997a2a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 May 2013 18:30:41 +0530 Subject: ARCv2: Support for ARCv2 ISA and HS38x cores The notable features are: - SMP configurations of upto 4 cores with coherency - Optional L2 Cache and IO-Coherency - Revised Interrupt Architecture (multiple priorites, reg banks, auto stack switch, auto regfile save/restore) - MMUv4 (PIPT dcache, Huge Pages) - Instructions for * 64bit load/store: LDD, STD * Hardware assisted divide/remainder: DIV, REM * Function prologue/epilogue: ENTER_S, LEAVE_S * IRQ enable/disable: CLRI, SETI * pop count: FFS, FLS * SETcc, BMSKN, XBFU... Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 80 +++++++++++++- arch/arc/Makefile | 8 +- arch/arc/include/asm/arcregs.h | 53 ++++++++- arch/arc/include/asm/bitops.h | 71 ++++++++++++ arch/arc/include/asm/elf.h | 5 + arch/arc/include/asm/entry-arcv2.h | 190 ++++++++++++++++++++++++++++++++ arch/arc/include/asm/entry.h | 4 + arch/arc/include/asm/irq.h | 5 + arch/arc/include/asm/irqflags-arcv2.h | 3 + arch/arc/include/asm/irqflags-compact.h | 2 + arch/arc/include/asm/irqflags.h | 4 + arch/arc/include/asm/ptrace.h | 43 ++++++++ arch/arc/include/asm/thread_info.h | 1 + arch/arc/kernel/Makefile | 3 +- arch/arc/kernel/entry-arcv2.S | 189 +++++++++++++++++++++++++++++++ arch/arc/kernel/head.S | 2 - arch/arc/kernel/process.c | 19 +++- arch/arc/kernel/ptrace.c | 2 +- arch/arc/kernel/setup.c | 45 +++++++- arch/arc/kernel/signal.c | 6 +- arch/arc/kernel/troubleshoot.c | 33 +++++- arch/arc/mm/tlbex.S | 2 - 22 files changed, 737 insertions(+), 33 deletions(-) create mode 100644 arch/arc/include/asm/entry-arcv2.h create mode 100644 arch/arc/kernel/entry-arcv2.S (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 1eeefd9763d1..f72398847b5b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -88,11 +88,31 @@ source "arch/arc/plat-axs10x/Kconfig" endmenu +choice + prompt "ARC Instruction Set" + default ISA_ARCOMPACT + +config ISA_ARCOMPACT + bool "ARCompact ISA" + help + The original ARC ISA of ARC600/700 cores + +### For bisectability, disable ARCv2 support until we have all the bits in place +#config ISA_ARCV2 +# bool "ARC ISA v2" +# help +# ISA for the Next Generation ARC-HS cores + +endchoice + menu "ARC CPU Configuration" choice prompt "ARC Core" - default ARC_CPU_770 + default ARC_CPU_770 if ISA_ARCOMPACT + default ARC_CPU_HS if ISA_ARCV2 + +if ISA_ARCOMPACT config ARC_CPU_750D bool "ARC750D" @@ -110,6 +130,27 @@ config ARC_CPU_770 -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr +endif #ISA_ARCOMPACT + +config ARC_CPU_HS + bool "ARC-HS" + depends on ISA_ARCV2 + help + Support for ARC HS38x Cores based on ARCv2 ISA + The notable features are: + - SMP configurations of upto 4 core with coherency + - Optional L2 Cache and IO-Coherency + - Revised Interrupt Architecture (multiple priorites, reg banks, + auto stack switch, auto regfile save/restore) + - MMUv4 (PIPT dcache, Huge Pages) + - Instructions for + * 64bit load/store: LDD, STD + * Hardware assisted divide/remainder: DIV, REM + * Function prologue/epilogue: ENTER_S, LEAVE_S + * IRQ enable/disable: CLRI, SETI + * pop count: FFS, FLS + * SETcc, BMSKN, XBFU... + endchoice config CPU_BIG_ENDIAN @@ -134,7 +175,7 @@ config ARC_HAS_COH_CACHES config ARC_HAS_REENTRANT_IRQ_LV2 def_bool n -endif +endif #SMP config NR_CPUS int "Maximum number of CPUs (2-4096)" @@ -223,7 +264,7 @@ config ARC_HAS_HW_MPY Multipler. Otherwise software multipy lib is used choice - prompt "ARC700 MMU Version" + prompt "MMU Version" default ARC_MMU_V3 if ARC_CPU_770 default ARC_MMU_V2 if ARC_CPU_750D @@ -268,6 +309,8 @@ config ARC_PAGE_SIZE_4K endchoice +if ISA_ARCOMPACT + config ARC_COMPACT_IRQ_LEVELS bool "ARCompact IRQ Priorities: High(2)/Low(1)" default n @@ -287,7 +330,7 @@ config ARC_IRQ5_LV2 config ARC_IRQ6_LV2 bool -endif +endif #ARC_COMPACT_IRQ_LEVELS config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" @@ -300,18 +343,43 @@ config ARC_FPU_SAVE_RESTORE based on actual usage of FPU by a task. Thus our implemn does this for all tasks in system. +endif #ISA_ARCOMPACT + config ARC_CANT_LLSC def_bool n config ARC_HAS_LLSC bool "Insn: LLOCK/SCOND (efficient atomic ops)" default y - depends on ARC_CPU_770 && !ARC_CANT_LLSC + depends on !ARC_CPU_750D && !ARC_CANT_LLSC config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y +if ISA_ARCV2 + +config ARC_HAS_LL64 + bool "Insn: 64bit LDD/STD" + help + Enable gcc to generate 64-bit load/store instructions + ISA mandates even/odd registers to allow encoding of two + dest operands with 2 possible source operands. + default y + +config ARC_NUMBER_OF_INTERRUPTS + int "Number of interrupts" + range 8 240 + default 32 + help + This defines the number of interrupts on the ARCv2HS core. + It affects the size of vector table. + The initial 8 IRQs are fixed (Timer, ICI etc) and although configurable + in hardware, it keep things simple for Linux to assume they are always + present. + +endif # ISA_ARCV2 + endmenu # "ARC CPU Configuration" config LINUX_LINK_BASE @@ -337,8 +405,10 @@ config ARC_CURR_IN_REG config ARC_EMUL_UNALIGNED bool "Emulate unaligned memory access (userspace only)" + default N select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_ARCH_UNALIGN_ALLOW + depends on ISA_ARCOMPACT help This enables misaligned 16 & 32 bit memory access from user space. Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 86c71b2089d2..bf68dc5a08be 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -14,7 +14,9 @@ endif KBUILD_DEFCONFIG := nsim_700_defconfig -cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__ +cflags-y += -fno-common -pipe -fno-builtin -D__linux__ +cflags-${CONFIG_ISA_ARCOMPACT} += -mA7 +cflags-${CONFIG_ISA_ARCV2} += -mcpu=archs ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -34,6 +36,10 @@ cflags-$(atleast_gcc44) += -fsection-anchors cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape +ifndef CONFIG_ARC_HAS_LL64 +cflags-y += -mno-ll64 +endif + cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 649646579986..373bb415e844 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -16,6 +16,7 @@ #define ARC_REG_PERIBASE_BCR 0x69 #define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ +#define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ #define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ #define ARC_REG_TIMERS_BCR 0x75 #define ARC_REG_AP_BCR 0x76 @@ -52,6 +53,7 @@ * [15: 8] = Exception Cause Code * [ 7: 0] = Exception Parameters (for certain types only) */ +#ifdef CONFIG_ISA_ARCOMPACT #define ECR_V_MEM_ERR 0x01 #define ECR_V_INSN_ERR 0x02 #define ECR_V_MACH_CHK 0x20 @@ -59,6 +61,15 @@ #define ECR_V_DTLB_MISS 0x22 #define ECR_V_PROTV 0x23 #define ECR_V_TRAP 0x25 +#else +#define ECR_V_MEM_ERR 0x01 +#define ECR_V_INSN_ERR 0x02 +#define ECR_V_MACH_CHK 0x03 +#define ECR_V_ITLB_MISS 0x04 +#define ECR_V_DTLB_MISS 0x05 +#define ECR_V_PROTV 0x06 +#define ECR_V_TRAP 0x09 +#endif /* DTLB Miss and Protection Violation Cause Codes */ @@ -202,9 +213,11 @@ struct bcr_identity { struct bcr_isa { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad1:23, atomic1:1, ver:8; + unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1, + pad1:11, atomic1:1, ver:8; #else - unsigned int ver:8, atomic1:1, pad1:23; + unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1, + ldd:1, pad2:4, div_rem:4; #endif }; @@ -267,11 +280,19 @@ struct bcr_fp_arcompact { #endif }; +struct bcr_fp_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8; +#else + unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15; +#endif +}; + struct bcr_timer { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8; + unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8; #else - unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15; + unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15; #endif }; @@ -283,6 +304,14 @@ struct bcr_bpu_arcompact { #endif }; +struct bcr_bpu_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8; +#else + unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6; +#endif +}; + struct bcr_generic { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:24, ver:8; @@ -334,6 +363,22 @@ struct cpuinfo_arc { extern struct cpuinfo_arc cpuinfo_arc700[]; +static inline int is_isa_arcv2(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCV2); +} + +static inline int is_isa_arcompact(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCOMPACT); +} + +#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7) +#error "Toolchain not configured for ARCompact builds" +#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS) +#error "Toolchain not configured for ARCv2 builds" +#endif + #endif /* __ASEMBLY__ */ #endif /* _ASM_ARC_ARCREGS_H */ diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939..829a8a2e9704 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -402,6 +402,8 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) return ((mask & *addr) != 0); } +#ifdef CONFIG_ISA_ARCOMPACT + /* * Count the number of zeros, starting from MSB * Helper for fls( ) friends @@ -494,6 +496,75 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word) return ffs(word) - 1; } +#else /* CONFIG_ISA_ARCV2 */ + +/* + * fls = Find Last Set in word + * @result: [1-32] + * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0 + */ +static inline __attribute__ ((const)) int fls(unsigned long x) +{ + int n; + + asm volatile( + " fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */ + " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ + : "=r"(n) /* Early clobber not needed */ + : "r"(x) + : "cc"); + + return n; +} + +/* + * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set + */ +static inline __attribute__ ((const)) int __fls(unsigned long x) +{ + /* FLS insn has exactly same semantics as the API */ + return __builtin_arc_fls(x); +} + +/* + * ffs = Find First Set in word (LSB to MSB) + * @result: [1-32], 0 if all 0's + */ +static inline __attribute__ ((const)) int ffs(unsigned long x) +{ + int n; + + asm volatile( + " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ + " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ + " mov.z %0, 0 \n" /* 31(Z)-> 0 */ + : "=r"(n) /* Early clobber not needed */ + : "r"(x) + : "cc"); + + return n; +} + +/* + * __ffs: Similar to ffs, but zero based (0-31) + */ +static inline __attribute__ ((const)) int __ffs(unsigned long x) +{ + int n; + + asm volatile( + " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ + " mov.z %0, 0 \n" /* 31(Z)-> 0 */ + : "=r"(n) + : "r"(x) + : "cc"); + + return n; + +} + +#endif /* CONFIG_ISA_ARCOMPACT */ + /* * ffz = Find First Zero in word. * @return:[0-31], 32 if all 1's diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index a26282857683..51a99e25fe33 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -15,6 +15,11 @@ /* These ELF defines belong to uapi but libc elf.h already defines them */ #define EM_ARCOMPACT 93 +#define EM_ARCV2 195 /* ARCv2 Cores */ + +#define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \ + EM_ARCOMPACT : EM_ARCV2) + /* ARC Relocations (kernel Modules only) */ #define R_ARC_32 0x4 #define R_ARC_32_ME 0x1B diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h new file mode 100644 index 000000000000..b5ff87e6f4b7 --- /dev/null +++ b/arch/arc/include/asm/entry-arcv2.h @@ -0,0 +1,190 @@ + +#ifndef __ASM_ARC_ENTRY_ARCV2_H +#define __ASM_ARC_ENTRY_ARCV2_H + +#include +#include +#include /* For THREAD_SIZE */ + +/*------------------------------------------------------------------------*/ +.macro INTERRUPT_PROLOGUE called_from + + ; Before jumping to Interrupt Vector, hardware micro-ops did following: + ; 1. SP auto-switched to kernel mode stack + ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0) + ; 3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32 + ; + ; Now manually save: r12, sp, fp, gp, r25 + + PUSH r12 + + ; Saving pt_regs->sp correctly requires some extra work due to the way + ; Auto stack switch works + ; - U mode: retrieve it from AUX_USER_SP + ; - K mode: add the offset from current SP where H/w starts auto push + ; + ; Utilize the fact that Z bit is set if Intr taken in U mode + mov.nz r9, sp + add.nz r9, r9, SZ_PT_REGS - PT_sp - 4 + bnz 1f + + lr r9, [AUX_USER_SP] +1: + PUSH r9 ; SP + + PUSH fp + PUSH gp + +#ifdef CONFIG_ARC_CURR_IN_REG + PUSH r25 ; user_r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + +.ifnc \called_from, exception + sub sp, sp, 12 ; BTA/ECR/orig_r0 placeholder per pt_regs +.endif + +.endm + +/*------------------------------------------------------------------------*/ +.macro INTERRUPT_EPILOGUE called_from + +.ifnc \called_from, exception + add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss +.endif + +#ifdef CONFIG_ARC_CURR_IN_REG + POP r25 +#else + add sp, sp, 4 +#endif + + POP gp + POP fp + + ; Don't touch AUX_USER_SP if returning to K mode (Z bit set) + ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE) + add.z sp, sp, 4 + bz 1f + + POPAX AUX_USER_SP +1: + POP r12 + +.endm + +/*------------------------------------------------------------------------*/ +.macro EXCEPTION_PROLOGUE + + ; Before jumping to Exception Vector, hardware micro-ops did following: + ; 1. SP auto-switched to kernel mode stack + ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0) + ; + ; Now manually save the complete reg file + + PUSH r9 ; freeup a register: slot of erstatus + + PUSHAX eret + sub sp, sp, 12 ; skip JLI, LDI, EI + PUSH lp_count + PUSHAX lp_start + PUSHAX lp_end + PUSH blink + + PUSH r11 + PUSH r10 + + ld.as r9, [sp, 10] ; load stashed r9 (status32 stack slot) + lr r10, [erstatus] + st.as r10, [sp, 10] ; save status32 at it's right stack slot + + PUSH r9 + PUSH r8 + PUSH r7 + PUSH r6 + PUSH r5 + PUSH r4 + PUSH r3 + PUSH r2 + PUSH r1 + PUSH r0 + + ; -- for interrupts, regs above are auto-saved by h/w in that order -- + ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25) + ; + ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE) + ; Although H/w exception micro-ops do set Z flag for U mode (just like + ; for interrupts), it could get clobbered in case we soft land here from + ; a TLB Miss exception handler (tlbex.S) + + and r10, r10, STATUS_U_MASK + xor.f 0, r10, STATUS_U_MASK + + INTERRUPT_PROLOGUE exception + + PUSHAX erbta + PUSHAX ecr ; r9 contains ECR, expected by EV_Trap + + PUSH r0 ; orig_r0 +.endm + +/*------------------------------------------------------------------------*/ +.macro EXCEPTION_EPILOGUE + + ; Assumes r0 has PT_status32 + btst r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE + + add sp, sp, 8 ; orig_r0/ECR don't need restoring + POPAX erbta + + INTERRUPT_EPILOGUE exception + + POP r0 + POP r1 + POP r2 + POP r3 + POP r4 + POP r5 + POP r6 + POP r7 + POP r8 + POP r9 + POP r10 + POP r11 + + POP blink + POPAX lp_end + POPAX lp_start + + POP r9 + mov lp_count, r9 + + add sp, sp, 12 ; skip JLI, LDI, EI + POPAX eret + POPAX erstatus + + ld.as r9, [sp, -12] ; reload r9 which got clobbered +.endm + +.macro FAKE_RET_FROM_EXCPN + lr r9, [status32] + bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) + or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK) + kflag r9 +.endm + +/* Get thread_info of "current" tsk */ +.macro GET_CURR_THR_INFO_FROM_SP reg + bmskn \reg, sp, THREAD_SHIFT - 1 +.endm + +/* Get CPU-ID of this core */ +.macro GET_CPU_ID reg + lr \reg, [identity] + xbfu \reg, \reg, 0xE8 /* 00111 01000 */ + /* M = 8-1 N = 8 */ +.endm + +#endif diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index f61032c53d51..29d0ab6e10f5 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -16,7 +16,11 @@ #include /* For VMALLOC_START */ #include +#ifdef CONFIG_ISA_ARCOMPACT #include /* ISA specific bits */ +#else +#include +#endif /* Note on the LD/ST addr modes with addr reg wback * diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index f38652fb2ed7..49014f0ef36d 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -13,8 +13,13 @@ #define NR_IRQS 128 /* allow some CPU external IRQ handling */ /* Platform Independent IRQs */ +#ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 +#else +#define TIMER0_IRQ 16 +#define TIMER1_IRQ 17 +#endif #include #include diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index c946c56f141c..1eb41b00aac5 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -27,6 +27,9 @@ #define AUX_IRQ_SELECT 0x40b #define AUX_IRQ_ENABLE 0x40c +/* Was Intr taken in User Mode */ +#define AUX_IRQ_ACT_BIT_U 31 + /* 0 is highest level, but taken by FIRQs, if present in design */ #define ARCV2_IRQ_DEF_PRIO 0 diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 18f3634ac347..aa805575c320 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -39,6 +39,8 @@ #define AUX_ITRIGGER 0x40d #define AUX_IPULSE 0x415 +#define ISA_INIT_STATUS_BITS STATUS_IE_MASK + #ifndef __ASSEMBLY__ /****************************************************************** diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index 333972600680..59bc6a64f75d 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -10,6 +10,10 @@ #ifndef __ASM_ARC_IRQFLAGS_H #define __ASM_ARC_IRQFLAGS_H +#ifdef CONFIG_ISA_ARCOMPACT #include +#else +#include +#endif #endif diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 1bfeec2c0558..91755972b9a2 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -16,6 +16,7 @@ /* THE pt_regs: Defines how regs are saved during entry into kernel */ +#ifdef CONFIG_ISA_ARCOMPACT struct pt_regs { /* Real registers */ @@ -56,6 +57,48 @@ struct pt_regs { long user_r25; }; +#else + +struct pt_regs { + + long orig_r0; + + union { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned long state:8, ecr_vec:8, + ecr_cause:8, ecr_param:8; +#else + unsigned long ecr_param:8, ecr_cause:8, + ecr_vec:8, state:8; +#endif + }; + unsigned long event; + }; + + long bta; /* bta_l1, bta_l2, erbta */ + + long user_r25; + + long r26; /* gp */ + long fp; + long sp; /* user/kernel sp depending on where we came from */ + + long r12; + + /*------- Below list auto saved by h/w -----------*/ + long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; + + long blink; + long lp_end, lp_start, lp_count; + + long ei, ldi, jli; + + long ret; + long status32; +}; + +#endif /* Callee saved registers - need to be saved only when you are scheduled out */ diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index aca0d5a45c7b..3af67455659a 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -25,6 +25,7 @@ #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index cc929c0e2133..0be7ba087260 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -10,7 +10,8 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o -obj-y += entry-compact.o intc-compact.o +obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o +obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S new file mode 100644 index 000000000000..c59a396b7b98 --- /dev/null +++ b/arch/arc/kernel/entry-arcv2.S @@ -0,0 +1,189 @@ +/* + * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include /* ARC_{EXTRY,EXIT} */ +#include /* SAVE_ALL_{INT1,INT2,TRAP...} */ +#include +#include +#include + + .cpu HS + +#define VECTOR .word + +;############################ Vector Table ################################# + + .section .vector,"a",@progbits + .align 4 + +# Initial 16 slots are Exception Vectors +VECTOR stext ; Restart Vector (jump to entry point) +VECTOR mem_service ; Mem exception +VECTOR instr_service ; Instrn Error +VECTOR EV_MachineCheck ; Fatal Machine check +VECTOR EV_TLBMissI ; Intruction TLB miss +VECTOR EV_TLBMissD ; Data TLB miss +VECTOR EV_TLBProtV ; Protection Violation +VECTOR EV_PrivilegeV ; Privilege Violation +VECTOR EV_SWI ; Software Breakpoint +VECTOR EV_Trap ; Trap exception +VECTOR EV_Extension ; Extn Instruction Exception +VECTOR EV_DivZero ; Divide by Zero +VECTOR EV_DCError ; Data Cache Error +VECTOR EV_Misaligned ; Misaligned Data Access +VECTOR reserved ; Reserved slots +VECTOR reserved ; Reserved slots + +# Begin Interrupt Vectors +VECTOR handle_interrupt ; (16) Timer0 +VECTOR handle_interrupt ; unused (Timer1) +VECTOR handle_interrupt ; unused (WDT) +VECTOR handle_interrupt ; (19) ICI (inter core interrupt) +VECTOR handle_interrupt +VECTOR handle_interrupt +VECTOR handle_interrupt +VECTOR handle_interrupt ; (23) End of fixed IRQs + +.rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8 + VECTOR handle_interrupt +.endr + + .section .text, "ax",@progbits + +res_service: ; processor restart + flag 0x1 ; not implemented + nop + nop + +reserved: ; processor restart + rtie ; jump to processor initializations + +;##################### Interrupt Handling ############################## + +ENTRY(handle_interrupt) + + INTERRUPT_PROLOGUE irq + + clri ; To make status32.IE agree with CPU internal state + + lr r0, [ICAUSE] + + mov blink, ret_from_exception + + b.d arch_do_IRQ + mov r1, sp + +END(handle_interrupt) + +;################### Non TLB Exception Handling ############################# + +ENTRY(EV_SWI) + flag 1 +END(EV_SWI) + +ENTRY(EV_DivZero) + flag 1 +END(EV_DivZero) + +ENTRY(EV_DCError) + flag 1 +END(EV_DCError) + +ENTRY(EV_Misaligned) + + EXCEPTION_PROLOGUE + + lr r0, [efa] ; Faulting Data address + mov r1, sp + + FAKE_RET_FROM_EXCPN + + SAVE_CALLEE_SAVED_USER + mov r2, sp ; callee_regs + + bl do_misaligned_access + + ; TBD: optimize - do this only if a callee reg was involved + ; either a dst of emulated LD/ST or src with address-writeback + RESTORE_CALLEE_SAVED_USER + + b ret_from_exception +END(EV_Misaligned) + +; --------------------------------------------- +; Protection Violation Exception Handler +; --------------------------------------------- + +ENTRY(EV_TLBProtV) + + EXCEPTION_PROLOGUE + + lr r0, [efa] ; Faulting Data address + mov r1, sp ; pt_regs + + FAKE_RET_FROM_EXCPN + + mov blink, ret_from_exception + b do_page_fault + +END(EV_TLBProtV) + +; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they +; need to call do_page_fault(). +; ECR in pt_regs provides whether access was R/W/X + +.global call_do_page_fault +.set call_do_page_fault, EV_TLBProtV + +;############# Common Handlers for ARCompact and ARCv2 ############## + +#include "entry.S" + +;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ############## +; +; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) +; IRQ shd definitely not happen between now and rtie +; All 2 entry points to here already disable interrupts + +.Lrestore_regs: + + ld r0, [sp, PT_status32] ; U/K mode at time of entry + lr r10, [AUX_IRQ_ACT] + + bmsk r11, r10, 15 ; AUX_IRQ_ACT.ACTIVE + breq r11, 0, .Lexcept_ret ; No intr active, ret from Exception + +;####### Return from Intr ####### + +debug_marker_l1: + ; Handle special case #1: (Entry via Exception, Return via IRQ) + ; + ; Exception in U mode, preempted in kernel, Intr taken (K mode), orig + ; task now returning to U mode (riding the Intr) + ; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP + ; won't be switched to correct U mode value (from AUX_SP) + ; So force AUX_IRQ_ACT.U for such a case + + btst r0, STATUS_U_BIT ; Z flag set if K (Z clear for U) + bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U + sr r11, [AUX_IRQ_ACT] + + INTERRUPT_EPILOGUE irq + rtie + +;####### Return from Exception / pure kernel mode ####### + +.Lexcept_ret: ; Expects r0 has PT_status32 + +debug_marker_syscall: + EXCEPTION_EPILOGUE + rtie + +END(ret_from_exception) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 64a92e0b1e53..812f95e6ae69 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -49,8 +49,6 @@ 1: .endm - .cpu A7 - .section .init.text, "ax",@progbits .type stext, @function .globl stext diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index b5426babd3c8..51560435a26b 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -44,7 +44,17 @@ SYSCALL_DEFINE0(arc_gettls) void arch_cpu_idle(void) { /* sleep, but enable all interrupts before committing */ - __asm__("sleep 0x3"); + if (is_isa_arcompact()) { + __asm__("sleep 0x3"); + } else { + /* default irq priority (<=) which can interrupt the doze */ + const int arg = 0x10 | ARCV2_IRQ_DEF_PRIO; + + __asm__ __volatile__( + "sleep %0 \n" + : + :"r"(arg)); + } } asmlinkage void ret_from_fork(void); @@ -166,7 +176,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) * [L] ZOL loop inhibited to begin with - cleared by a LP insn * Interrupts enabled */ - regs->status32 = STATUS_U_MASK | STATUS_L_MASK | STATUS_IE_MASK; + regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS; /* bogus seed values for debugging */ regs->lp_start = 0x10; @@ -196,8 +206,11 @@ int elf_check_arch(const struct elf32_hdr *x) { unsigned int eflags; - if (x->e_machine != EM_ARCOMPACT) + if (x->e_machine != EM_ARC_INUSE) { + pr_err("ELF not built for %s ISA\n", + is_isa_arcompact() ? "ARCompact":"ARCv2"); return 0; + } eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) { diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 4dd9e3a8c2da..4442204fe238 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -200,7 +200,7 @@ static const struct user_regset arc_regsets[] = { static const struct user_regset_view user_arc_view = { .name = UTS_MACHINE, - .e_machine = EM_ARCOMPACT, + .e_machine = EM_ARC_INUSE, .regsets = arc_regsets, .n = ARRAY_SIZE(arc_regsets) }; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 96d44805ea56..d6fe80070bbf 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -96,7 +96,7 @@ static void read_arc_build_cfg_regs(void) read_decode_mmu_bcr(); read_decode_cache_bcr(); - { + if (is_isa_arcompact()) { struct bcr_fp_arcompact sp, dp; struct bcr_bpu_arcompact bpu; @@ -112,6 +112,19 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.num_cache = 256 << (bpu.ent - 1); cpu->bpu.num_pred = 256 << (bpu.ent - 1); } + } else { + struct bcr_fp_arcv2 spdp; + struct bcr_bpu_arcv2 bpu; + + READ_BCR(ARC_REG_FP_V2_BCR, spdp); + cpu->extn.fpu_sp = spdp.sp ? 1 : 0; + cpu->extn.fpu_dp = spdp.dp ? 1 : 0; + + READ_BCR(ARC_REG_BPU_BCR, bpu); + cpu->bpu.ver = bpu.ver; + cpu->bpu.full = bpu.ft; + cpu->bpu.num_cache = 256 << bpu.bce; + cpu->bpu.num_pred = 2048 << bpu.pte; } READ_BCR(ARC_REG_AP_BCR, bcr); @@ -131,6 +144,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x30, "ARC 700" }, 0x33}, { {0x34, "ARC 700 R4.10"}, 0x34}, { {0x35, "ARC 700 R4.11"}, 0x35}, + { {0x50, "ARC HS38" }, 0x51}, { {0x00, NULL } } }; @@ -149,13 +163,17 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); - { + if (is_isa_arcompact()) { isa_nm = "ARCompact"; be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); atomic = cpu->isa.atomic1; if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + } else { + isa_nm = "ARCv2"; + be = cpu->isa.be; + atomic = cpu->isa.atomic; } n += scnprintf(buf + n, len - n, @@ -184,14 +202,31 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL1(cpu->timers.t0, "Timer0 "), IS_AVAIL1(cpu->timers.t1, "Timer1 ")); - n += i = scnprintf(buf + n, len - n, "%s%s", - IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC)); + n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", + IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), + IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); if (i) n += scnprintf(buf + n, len - n, "\n\t\t: "); + if (cpu->extn_mpy.ver) { + if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */ + n += scnprintf(buf + n, len - n, "mpy "); + } else { + int opt = 2; /* stock MPY/MPYH */ + + if (cpu->extn_mpy.dsp) /* OPT 7-9 */ + opt = cpu->extn_mpy.dsp + 6; + + n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); + } + n += scnprintf(buf + n, len - n, "%s", + IS_USED(CONFIG_ARC_HAS_HW_MPY)); + } + n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", - IS_AVAIL1(cpu->extn_mpy.ver, "mpy "), + IS_AVAIL1(cpu->isa.div_rem, "div_rem "), IS_AVAIL1(cpu->extn.norm, "norm "), IS_AVAIL1(cpu->extn.barrel, "barrel-shift "), IS_AVAIL1(cpu->extn.swap, "swap "), diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index b15d2fe9c461..004b7f0bc76c 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -336,7 +336,7 @@ static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs) * their orig user space value when we ret from kernel */ regs->r0 = regs->orig_r0; - regs->ret -= 4; + regs->ret -= is_isa_arcv2() ? 2 : 4; break; } } @@ -377,10 +377,10 @@ void do_signal(struct pt_regs *regs) if (regs->r0 == -ERESTARTNOHAND || regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) { regs->r0 = regs->orig_r0; - regs->ret -= 4; + regs->ret -= is_isa_arcv2() ? 2 : 4; } else if (regs->r0 == -ERESTART_RESTARTBLOCK) { regs->r8 = __NR_restart_syscall; - regs->ret -= 4; + regs->ret -= is_isa_arcv2() ? 2 : 4; } syscall_wont_restart(regs); /* No more restarts */ } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index e00a01879025..e0cf99893212 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -14,6 +14,7 @@ #include #include #include +#include /* * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25) @@ -34,7 +35,10 @@ static noinline void print_reg_file(long *reg_rev, int start_num) n += scnprintf(buf + n, len - n, "\n"); /* because pt_regs has regs reversed: r12..r0, r25..r13 */ - reg_rev--; + if (is_isa_arcv2() && start_num == 0) + reg_rev++; + else + reg_rev--; } if (start_num != 0) @@ -152,6 +156,15 @@ static void show_ecr_verbose(struct pt_regs *regs) ((cause_code == 0x02) ? "Write" : "EX")); } else if (vec == ECR_V_INSN_ERR) { pr_cont("Illegal Insn\n"); +#ifdef CONFIG_ISA_ARCV2 + } else if (vec == ECR_V_MEM_ERR) { + if (cause_code == 0x00) + pr_cont("Bus Error from Insn Mem\n"); + else if (cause_code == 0x10) + pr_cont("Bus Error from Data Mem\n"); + else + pr_cont("Bus Error, check PRM\n"); +#endif } else { pr_cont("Check Programmer's Manual\n"); } @@ -185,12 +198,20 @@ void show_regs(struct pt_regs *regs) pr_info("[STAT32]: 0x%08lx", regs->status32); -#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit : "" - if (!user_mode(regs)) - pr_cont(" : %2s %2s %2s %2s %2s\n", - STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1), - STS_BIT(regs, E2), STS_BIT(regs, E1)); +#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : "" +#ifdef CONFIG_ISA_ARCOMPACT + pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n", + (regs->status32 & STATUS_U_MASK) ? "U " : "K ", + STS_BIT(regs, DE), STS_BIT(regs, AE), + STS_BIT(regs, A2), STS_BIT(regs, A1), + STS_BIT(regs, E2), STS_BIT(regs, E1)); +#else + pr_cont(" : %2s%2s%2s%2s\n", + STS_BIT(regs, IE), + (regs->status32 & STATUS_U_MASK) ? "U " : "K ", + STS_BIT(regs, DE), STS_BIT(regs, AE)); +#endif pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n", regs->bta, regs->sp, regs->fp); pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index d224bf0feefc..00c8d7f772bc 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -35,8 +35,6 @@ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 */ - .cpu A7 - #include #include #include -- cgit v1.2.3-58-ga151 From 0d7b8855a05c099a5c65a8d49a1e604198021f56 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 7 Oct 2014 14:12:13 +0530 Subject: ARCv2: STAR 9000808988: signals involving Delay Slot Reported by Anton as LTP:munmap01 failing with Illegal Instruction Exception. --------------------->8-------------------------------------- mmap2(NULL, 24576, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0x200d2000 munmap(0x200d2000, 24576) = 0 --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x200d2000} --- potentially unexpected fatal signal 4. Path: /munmap01 CPU: 0 PID: 61 Comm: munmap01 Not tainted 3.13.0-g5d5c46d9a556 #8 task: 9f1a8000 ti: 9f154000 task.ti: 9f154000 [ECR ]: 0x00020100 => Illegal Insn [EFA ]: 0x0001354c [BLINK ]: 0x200515d4 [ERET ]: 0x1354c @off 0x1354c in [/munmap01] VMA: 0x00010000 to 0x00018000 [STAT32]: 0x800802c0 ... --------------------->8-------------------------------------- The issue was 1. munmap01 accessed unmapped memory (on purpose) with signal handler installed for SIGSEGV 2. The faulting instruction happened to be in Delay Slot 00011864
: 11908: bl.d 13284 1190c: stb r16,[r2] 3. kernel sets up the reg file for signal handler and correctly clears the DE bit in pt_regs->status32 placeholder 4. However RESTORE_CALLEE_SAVED_USER macro is not adjusted for ARCv2, and it over-writes the above with orig/stale value of status32 5. After RTIE, userspace signal handler executes a non branch instruction with DE bit set, triggering Illegal Instruction Exception. Reported-by: Anton Kolesov Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry.h | 17 ++++++++++------- arch/arc/kernel/asm-offsets.c | 2 ++ 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 29d0ab6e10f5..ad7860c5ce15 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -125,8 +125,6 @@ POP r13 .endm -#define OFF_USER_R25_FROM_R24 (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4 - /*-------------------------------------------------------------- * Collect User Mode callee regs as struct callee_regs - needed by * fork/do_signal/unaligned-access-emulation. @@ -139,12 +137,13 @@ *-------------------------------------------------------------*/ .macro SAVE_CALLEE_SAVED_USER + mov r12, sp ; save SP as ref to pt_regs SAVE_R13_TO_R24 #ifdef CONFIG_ARC_CURR_IN_REG - ; Retrieve orig r25 and save it on stack - ld.as r12, [sp, OFF_USER_R25_FROM_R24] - st.a r12, [sp, -4] + ; Retrieve orig r25 and save it with rest of callee_regs + ld.as r12, [r12, PT_user_r25] + PUSH r12 #else PUSH r25 #endif @@ -191,12 +190,16 @@ .macro RESTORE_CALLEE_SAVED_USER #ifdef CONFIG_ARC_CURR_IN_REG - ld.ab r12, [sp, 4] - st.as r12, [sp, OFF_USER_R25_FROM_R24] + POP r12 #else POP r25 #endif RESTORE_R24_TO_R13 + + ; SP is back to start of pt_regs +#ifdef CONFIG_ARC_CURR_IN_REG + st.as r12, [sp, PT_user_r25] +#endif .endm /*-------------------------------------------------------------- diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index b9cf23313273..605281f5b301 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -60,5 +60,7 @@ int main(void) DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); + DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25)); + return 0; } -- cgit v1.2.3-58-ga151 From 4de0e52867d831057676340271d21cfb920eac1c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 15 Nov 2014 17:00:08 +0530 Subject: ARCv2: STAR 9000814690: Really Re-enable interrupts to avoid deadlocks The issue was, on HS when interrupt is taken, IRQ_ACT is set and that is NOT cleared unless we do RTIE (or manually clear it). Linux interrupt handling has top and bottom halves. Latter lead to softirqs (which can reschedule) AND expect interrupts to be REALLY re-enabled which was NOT happening for us since we only SETI, dont clear IRQ_ACT So we can have a state when both cores have taken interrupt (IRQ_ACT set), get rescheduled, both send IPI and wait in CSD lock which will never be cleared as cores can't take the pending IPI IRQ due to existing IRQ_ACT set. So local_irq_enable() now drops the IRQ_ACT.act bit to re-enable IRQs. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irqflags-arcv2.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index 1eb41b00aac5..ad481c24070d 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -64,6 +64,11 @@ static inline void arch_local_irq_restore(unsigned long flags) */ static inline void arch_local_irq_enable(void) { + unsigned int irqact = read_aux_reg(AUX_IRQ_ACT); + + if (irqact & 0xffff) + write_aux_reg(AUX_IRQ_ACT, irqact & ~0xffff); + __asm__ __volatile__(" seti \n" : : : "memory"); } -- cgit v1.2.3-58-ga151 From d7a512bfe0be3790bae8465b4cb6c1bbca03c616 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 6 Apr 2015 17:22:39 +0530 Subject: ARCv2: MMUv4: TLB programming Model changes Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 5 ++++ arch/arc/include/asm/arcregs.h | 2 +- arch/arc/include/asm/mmu.h | 24 ++++++++++++++++++- arch/arc/include/asm/pgtable.h | 10 ++++++++ arch/arc/mm/tlb.c | 54 +++++++++++++++++++++++++++++++++++++++--- arch/arc/mm/tlbex.S | 24 +++++++++++++++++++ 6 files changed, 114 insertions(+), 5 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f72398847b5b..6568977bb302 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -267,6 +267,7 @@ choice prompt "MMU Version" default ARC_MMU_V3 if ARC_CPU_770 default ARC_MMU_V2 if ARC_CPU_750D + default ARC_MMU_V4 if ARC_CPU_HS config ARC_MMU_V1 bool "MMU v1" @@ -287,6 +288,10 @@ config ARC_MMU_V3 Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Shared Address Spaces (SASID) +config ARC_MMU_V4 + bool "MMU v4" + depends on ISA_ARCV2 + endchoice diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 373bb415e844..bcb08cd52d38 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -326,7 +326,7 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver:4, pg_sz_k:8, pad:8, u_dtlb:6, u_itlb:6; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6; unsigned int num_tlb:16, sets:12, ways:4; }; diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 8c84ae98c337..0f9c3eb5327e 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -15,24 +15,41 @@ #define CONFIG_ARC_MMU_VER 2 #elif defined(CONFIG_ARC_MMU_V3) #define CONFIG_ARC_MMU_VER 3 +#elif defined(CONFIG_ARC_MMU_V4) +#define CONFIG_ARC_MMU_VER 4 #endif /* MMU Management regs */ #define ARC_REG_MMU_BCR 0x06f +#if (CONFIG_ARC_MMU_VER < 4) #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 #define ARC_REG_TLBINDEX 0x407 #define ARC_REG_TLBCOMMAND 0x408 #define ARC_REG_PID 0x409 #define ARC_REG_SCRATCH_DATA0 0x418 +#else +#define ARC_REG_TLBPD0 0x460 +#define ARC_REG_TLBPD1 0x461 +#define ARC_REG_TLBINDEX 0x464 +#define ARC_REG_TLBCOMMAND 0x465 +#define ARC_REG_PID 0x468 +#define ARC_REG_SCRATCH_DATA0 0x46c +#endif /* Bits in MMU PID register */ -#define MMU_ENABLE (1 << 31) /* Enable MMU for process */ +#define __TLB_ENABLE (1 << 31) +#define __PROG_ENABLE (1 << 30) +#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) /* Error code if probe fails */ #define TLB_LKUP_ERR 0x80000000 +#if (CONFIG_ARC_MMU_VER < 4) #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) +#else +#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) +#endif /* TLB Commands */ #define TLBWrite 0x1 @@ -45,6 +62,11 @@ #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ #endif +#if (CONFIG_ARC_MMU_VER >= 4) +#define TLBInsertEntry 0x7 +#define TLBDeleteEntry 0x8 +#endif + #ifndef __ASSEMBLY__ typedef struct { diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 9615fe1701c6..1281718802f7 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -72,8 +72,18 @@ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ #define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ + +#if (CONFIG_ARC_MMU_VER >= 4) +#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ +#endif + #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ + +#if (CONFIG_ARC_MMU_VER >= 4) +#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */ +#endif + #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ #endif diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 914d8e0c0318..2c7ce8bb7475 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -113,6 +113,8 @@ static inline void __tlb_entry_erase(void) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +#if (CONFIG_ARC_MMU_VER < 4) + static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { unsigned int idx; @@ -210,6 +212,28 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +#else /* CONFIG_ARC_MMU_VER >= 4) */ + +static void utlb_invalidate(void) +{ + /* No need since uTLB is always in sync with JTLB */ +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); +} + +static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +{ + write_aux_reg(ARC_REG_TLBPD0, pd0); + write_aux_reg(ARC_REG_TLBPD1, pd1); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); +} + +#endif + /* * Un-conditionally (without lookup) erase the entire MMU contents */ @@ -582,6 +606,17 @@ void read_decode_mmu_bcr(void) #endif } *mmu3; + struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif + } *mmu4; + tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); @@ -592,13 +627,21 @@ void read_decode_mmu_bcr(void) mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; mmu->u_itlb = mmu2->u_itlb; - } else { + } else if (mmu->ver == 3) { mmu3 = (struct bcr_mmu_3 *)&tmp; mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); mmu->sets = 1 << mmu3->sets; mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; mmu->u_itlb = mmu3->u_itlb; + } else { + mmu4 = (struct bcr_mmu_4 *)&tmp; + mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); + mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); + mmu->sets = 64 << mmu4->n_entry; + mmu->ways = mmu4->n_ways * 2; + mmu->u_dtlb = mmu4->u_dtlb * 4; + mmu->u_itlb = mmu4->u_itlb * 4; } mmu->num_tlb = mmu->sets * mmu->ways; @@ -608,10 +651,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; + char super_pg[64] = ""; + + if (p_mmu->s_pg_sz_m) + scnprintf(super_pg, 64, "%dM Super Page%s, ", + p_mmu->s_pg_sz_m, " (not used)"); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", - p_mmu->ver, p_mmu->pg_sz_k, + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 00c8d7f772bc..8624ebd7114e 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -44,6 +44,7 @@ #include #include +#ifdef CONFIG_ISA_ARCOMPACT ;----------------------------------------------------------------- ; ARC700 Exception Handling doesn't auto-switch stack and it only provides ; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" @@ -121,6 +122,24 @@ ex_saved_reg1: #endif .endm +#else /* ARCv2 */ + +.macro TLBMISS_FREEUP_REGS + PUSH r0 + PUSH r1 + PUSH r2 + PUSH r3 +.endm + +.macro TLBMISS_RESTORE_REGS + POP r3 + POP r2 + POP r1 + POP r0 +.endm + +#endif + ;============================================================================ ; Troubleshooting Stuff ;============================================================================ @@ -239,6 +258,7 @@ ex_saved_reg1: ; Commit the TLB entry into MMU .macro COMMIT_ENTRY_TO_MMU +#if (CONFIG_ARC_MMU_VER < 4) /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] @@ -249,6 +269,10 @@ ex_saved_reg1: #else sr TLBWrite, [ARC_REG_TLBCOMMAND] #endif + +#else + sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] +#endif .endm -- cgit v1.2.3-58-ga151 From d1f317d8254413447bcd6b6adbde24a985d256c2 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 6 Apr 2015 17:23:57 +0530 Subject: ARCv2: MMUv4: cache programming model changes Caveats about cache flush on ARCv2 based cores - dcache is PIPT so paddr is sufficient for cache maintenance ops (no need to setup PTAG reg - icache is still VIPT but only aliasing configs need PTAG setup So basically this is departure from MMU-v3 which always need vaddr in line ops registers (DC_IVDL, DC_FLDL, IC_IVIL) but paddr in DC_PTAG, IC_PTAG respectively. Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 +- arch/arc/include/asm/arcregs.h | 5 +- arch/arc/include/asm/cache.h | 3 ++ arch/arc/mm/cache.c | 112 +++++++++++++++++++++++++++++++++++------ 4 files changed, 104 insertions(+), 18 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6568977bb302..974ed9058018 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -223,7 +223,7 @@ config ARC_CACHE_PAGES config ARC_CACHE_VIPT_ALIASING bool "Support VIPT Aliasing D$" - depends on ARC_HAS_DCACHE + depends on ARC_HAS_DCACHE && ISA_ARCOMPACT default n endif #ARC_CACHE diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index bcb08cd52d38..070f58827a5c 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -17,6 +17,7 @@ #define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ #define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ +#define ARC_REG_SLC_BCR 0xce #define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ #define ARC_REG_TIMERS_BCR 0x75 #define ARC_REG_AP_BCR 0x76 @@ -331,7 +332,7 @@ struct cpuinfo_arc_mmu { }; struct cpuinfo_arc_cache { - unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6; + unsigned int sz_k:14, line_len:8, assoc:4, ver:4, alias:1, vipt:1; }; struct cpuinfo_arc_bpu { @@ -343,7 +344,7 @@ struct cpuinfo_arc_ccm { }; struct cpuinfo_arc { - struct cpuinfo_arc_cache icache, dcache; + struct cpuinfo_arc_cache icache, dcache, slc; struct cpuinfo_arc_mmu mmu; struct cpuinfo_arc_bpu bpu; struct bcr_identity core; diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 7861255da32d..e54977a7d006 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -82,4 +82,7 @@ extern void read_decode_cache_bcr(void); #define DC_CTRL_INV_MODE_FLUSH 0x40 #define DC_CTRL_FLUSH_STATUS 0x100 +/*System-level cache (L2 cache) related Auxiliary registers */ +#define ARC_REG_SLC_CFG 0x901 + #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 6fa5f0f7f549..7a898f57d84b 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -24,6 +24,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; + struct cpuinfo_arc_cache *p; #define PR_CACHE(p, cfg, str) \ if (!(p)->ver) \ @@ -39,6 +40,11 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); + p = &cpuinfo_arc700[c].slc; + if (p->ver) + n += scnprintf(buf + n, len - n, + "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len); + return buf; } @@ -49,7 +55,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) */ void read_decode_cache_bcr(void) { - struct cpuinfo_arc_cache *p_ic, *p_dc; + struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc; unsigned int cpu = smp_processor_id(); struct bcr_cache { #ifdef CONFIG_CPU_BIG_ENDIAN @@ -59,14 +65,29 @@ void read_decode_cache_bcr(void) #endif } ibcr, dbcr; + struct bcr_generic sbcr; + + struct bcr_slc_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif + } slc_cfg; + p_ic = &cpuinfo_arc700[cpu].icache; READ_BCR(ARC_REG_IC_BCR, ibcr); if (!ibcr.ver) goto dc_chk; - BUG_ON(ibcr.config != 3); - p_ic->assoc = 2; /* Fixed to 2w set assoc */ + if (ibcr.ver <= 3) { + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ + } else if (ibcr.ver >= 4) { + p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */ + } + p_ic->line_len = 8 << ibcr.line_len; p_ic->sz_k = 1 << (ibcr.sz - 1); p_ic->ver = ibcr.ver; @@ -78,15 +99,32 @@ dc_chk: READ_BCR(ARC_REG_DC_BCR, dbcr); if (!dbcr.ver) - return; + goto slc_chk; + + if (dbcr.ver <= 3) { + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ + p_dc->vipt = 1; + p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + } else if (dbcr.ver >= 4) { + p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */ + p_dc->vipt = 0; + p_dc->alias = 0; /* PIPT so can't VIPT alias */ + } - BUG_ON(dbcr.config != 2); - p_dc->assoc = 4; /* Fixed to 4w set assoc */ p_dc->line_len = 16 << dbcr.line_len; p_dc->sz_k = 1 << (dbcr.sz - 1); p_dc->ver = dbcr.ver; - p_dc->vipt = 1; - p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + +slc_chk: + p_slc = &cpuinfo_arc700[cpu].slc; + READ_BCR(ARC_REG_SLC_BCR, sbcr); + if (sbcr.ver) { + READ_BCR(ARC_REG_SLC_CFG, slc_cfg); + p_slc->ver = sbcr.ver; + p_slc->sz_k = 128 << slc_cfg.sz; + p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + } } /* @@ -225,10 +263,53 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, } } +/* + * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache + * maintenance ops (in IVIL reg), as long as icache doesn't alias. + * + * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is + * specified in PTAG (similar to MMU v3) + */ +static inline +void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + while (num_lines-- > 0) { + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; + } +} + #if (CONFIG_ARC_MMU_VER < 3) #define __cache_line_loop __cache_line_loop_v2 #elif (CONFIG_ARC_MMU_VER == 3) #define __cache_line_loop __cache_line_loop_v3 +#elif (CONFIG_ARC_MMU_VER > 3) +#define __cache_line_loop __cache_line_loop_v4 #endif #ifdef CONFIG_ARC_HAS_DCACHE @@ -669,7 +750,6 @@ void arc_cache_init(void) if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - int handled; if (!dc->ver) panic("cache support enabled but non-existent cache\n"); @@ -678,12 +758,14 @@ void arc_cache_init(void) panic("DCache line [%d] != kernel Config [%d]", dc->line_len, L1_CACHE_BYTES); - /* check for D-Cache aliasing */ - handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ + if (is_isa_arcompact()) { + int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - if (dc->alias && !handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dc->alias && handled) - panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + if (dc->alias && !handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dc->alias && handled) + panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } } } -- cgit v1.2.3-58-ga151 From bcc4d65abec2adb74157b34519e80331eb4427eb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 4 Jun 2015 14:39:15 +0530 Subject: ARCv2: MMUv4: support aliasing icache config This is also default for AXS103 release Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 4 +--- arch/arc/mm/cache.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e54977a7d006..d21c76d6b054 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -60,7 +60,7 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_IC_IVIC 0x10 #define ARC_REG_IC_CTRL 0x11 #define ARC_REG_IC_IVIL 0x19 -#if defined(CONFIG_ARC_MMU_V3) +#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #define ARC_REG_IC_PTAG 0x1E #endif @@ -74,9 +74,7 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_DC_IVDL 0x4A #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C -#if defined(CONFIG_ARC_MMU_V3) #define ARC_REG_DC_PTAG 0x5C -#endif /* Bit val in DC_CTRL */ #define DC_CTRL_INV_MODE_FLUSH 0x40 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 7a898f57d84b..0eaaee60fd0b 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -21,6 +21,9 @@ #include #include +void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop); + char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; @@ -414,7 +417,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, unsigned long flags; local_irq_save(flags); - __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC); local_irq_restore(flags); } @@ -746,6 +749,15 @@ void arc_cache_init(void) if (ic->ver != CONFIG_ARC_MMU_VER) panic("Cache ver [%d] doesn't match MMU ver [%d]\n", ic->ver, CONFIG_ARC_MMU_VER); + + /* + * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG + * pair to provide vaddr/paddr respectively, just as in MMU v3 + */ + if (is_isa_arcv2() && ic->alias) + _cache_line_loop_ic_fn = __cache_line_loop_v3; + else + _cache_line_loop_ic_fn = __cache_line_loop; } if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { -- cgit v1.2.3-58-ga151 From 8922bc3058abbe5deaf887147e26531750ce7513 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 7 Oct 2013 18:10:08 +0530 Subject: ARCv2: Adhere to Zero Delay loop restriction Branch insn can't be scheduled as last insn of Zero Overhead loop Signed-off-by: Vineet Gupta --- arch/arc/include/asm/delay.h | 9 ++++----- arch/arc/include/asm/uaccess.h | 17 ++++++++--------- arch/arc/lib/memcmp.S | 30 +++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 15 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index 43de30256981..08e7e2a16ac1 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -22,11 +22,10 @@ static inline void __delay(unsigned long loops) { __asm__ __volatile__( - "1: sub.f %0, %0, 1 \n" - " jpnz 1b \n" - : "+r"(loops) - : - : "cc"); + " lp 1f \n" + " nop \n" + "1: \n" + : "+l"(loops)); } extern void __bad_udelay(void); diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 30c9baffa96f..d1da6032b715 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) static inline long __arc_strncpy_from_user(char *dst, const char __user *src, long count) { - long res = count; + long res = 0; char val; - unsigned int hw_count; if (count == 0) return 0; __asm__ __volatile__( - " lp 2f \n" + " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" - " breq.d %3, 0, 2f \n" + " breq.d %3, 0, 3f \n" " stb.ab %3, [%1, 1] \n" - "2: sub %0, %6, %4 \n" - "3: ;nop \n" + " add %0, %0, 1 # Num of NON NULL bytes copied \n" + "3: \n" " .section .fixup, \"ax\" \n" " .align 4 \n" - "4: mov %0, %5 \n" + "4: mov %0, %4 # sets @res as -EFAULT \n" " j 3b \n" " .previous \n" " .section __ex_table, \"a\" \n" " .align 4 \n" " .word 1b, 4b \n" " .previous \n" - : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count) - : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */ + : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) + : "g"(-EFAULT), "l"(count) : "memory"); return res; diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S index 978bf8314dfb..a4015e7d9ab7 100644 --- a/arch/arc/lib/memcmp.S +++ b/arch/arc/lib/memcmp.S @@ -24,14 +24,32 @@ ENTRY(memcmp) ld r4,[r0,0] ld r5,[r1,0] lsr.f lp_count,r3,3 +#ifdef CONFIG_ISA_ARCV2 + /* In ARCv2 a branch can't be the last instruction in a zero overhead + * loop. + * So we move the branch to the start of the loop, duplicate it + * after the end, and set up r12 so that the branch isn't taken + * initially. + */ + mov_s r12,WORD2 + lpne .Loop_end + brne WORD2,r12,.Lodd + ld WORD2,[r0,4] +#else lpne .Loop_end ld_s WORD2,[r0,4] +#endif ld_s r12,[r1,4] brne r4,r5,.Leven ld.a r4,[r0,8] ld.a r5,[r1,8] +#ifdef CONFIG_ISA_ARCV2 +.Loop_end: + brne WORD2,r12,.Lodd +#else brne WORD2,r12,.Lodd .Loop_end: +#endif asl_s SHIFT,SHIFT,3 bhs_s .Last_cmp brne r4,r5,.Leven @@ -89,7 +107,6 @@ ENTRY(memcmp) bset.cs r0,r0,31 .Lodd: cmp_s WORD2,r12 - mov_s r0,1 j_s.d [blink] bset.cs r0,r0,31 @@ -100,14 +117,25 @@ ENTRY(memcmp) ldb r4,[r0,0] ldb r5,[r1,0] lsr.f lp_count,r3 +#ifdef CONFIG_ISA_ARCV2 + mov r12,r3 lpne .Lbyte_end + brne r3,r12,.Lbyte_odd +#else + lpne .Lbyte_end +#endif ldb_s r3,[r0,1] ldb r12,[r1,1] brne r4,r5,.Lbyte_even ldb.a r4,[r0,2] ldb.a r5,[r1,2] +#ifdef CONFIG_ISA_ARCV2 +.Lbyte_end: + brne r3,r12,.Lbyte_odd +#else brne r3,r12,.Lbyte_odd .Lbyte_end: +#endif bcc .Lbyte_even brne r4,r5,.Lbyte_even ldb_s r3,[r0,1] -- cgit v1.2.3-58-ga151 From 82fea5a1bbbe8c3b56d5f3efbf8880c7b25b1758 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 10 Sep 2014 19:05:38 +0530 Subject: ARCv2: SMP: Support ARConnect (MCIP) for Inter-Core-Interrupts et al Cc: Jason Cooper Cc: Thomas Gleixner Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 21 +++++--- arch/arc/include/asm/irq.h | 1 + arch/arc/include/asm/mcip.h | 91 +++++++++++++++++++++++++++++++++ arch/arc/kernel/Makefile | 1 + arch/arc/kernel/intc-arcv2.c | 2 +- arch/arc/kernel/mcip.c | 117 +++++++++++++++++++++++++++++++++++++++++++ arch/arc/plat-sim/platform.c | 5 ++ 7 files changed, 230 insertions(+), 8 deletions(-) create mode 100644 arch/arc/include/asm/mcip.h create mode 100644 arch/arc/kernel/mcip.c (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f09e03a0d604..301525020af7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -160,12 +160,12 @@ config CPU_BIG_ENDIAN Build kernel for Big Endian Mode of ARC CPU config SMP - bool "Symmetric Multi-Processing (Incomplete)" + bool "Symmetric Multi-Processing" default n + select ARC_HAS_COH_CACHES if ISA_ARCV2 + select ARC_MCIP if ISA_ARCV2 help - This enables support for systems with more than one CPU. If you have - a system with only one CPU, say N. If you have a system with more - than one CPU, say Y. + This enables support for systems with more than one CPU. if SMP @@ -175,13 +175,20 @@ config ARC_HAS_COH_CACHES config ARC_HAS_REENTRANT_IRQ_LV2 def_bool n -endif #SMP +config ARC_MCIP + bool "ARConnect Multicore IP (MCIP) Support " + depends on ISA_ARCV2 + help + This IP block enables SMP in ARC-HS38 cores. + It provides for cross-core interrupts, multi-core debug + hardware semaphores, shared memory,.... config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 - depends on SMP - default "2" + default "4" + +endif #SMP menuconfig ARC_CACHE bool "Enable Cache Support" diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index 49014f0ef36d..bc5103637326 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -19,6 +19,7 @@ #else #define TIMER0_IRQ 16 #define TIMER1_IRQ 17 +#define IPI_IRQ 19 #endif #include diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h new file mode 100644 index 000000000000..31f9bac77a27 --- /dev/null +++ b/arch/arc/include/asm/mcip.h @@ -0,0 +1,91 @@ +/* + * ARConnect IP Support (Multi core enabler: Cross core IPI, RTC ...) + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_MCIP_H +#define __ASM_MCIP_H + +#ifdef CONFIG_ISA_ARCV2 + +#include + +#define ARC_REG_MCIP_BCR 0x0d0 +#define ARC_REG_MCIP_CMD 0x600 +#define ARC_REG_MCIP_WDATA 0x601 +#define ARC_REG_MCIP_READBACK 0x602 + +struct mcip_cmd { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:8, param:16, cmd:8; +#else + unsigned int cmd:8, param:16, pad:8; +#endif + +#define CMD_INTRPT_GENERATE_IRQ 0x01 +#define CMD_INTRPT_GENERATE_ACK 0x02 +#define CMD_INTRPT_READ_STATUS 0x03 +#define CMD_INTRPT_CHECK_SOURCE 0x04 + +/* Semaphore Commands */ +#define CMD_SEMA_CLAIM_AND_READ 0x11 +#define CMD_SEMA_RELEASE 0x12 + +#define CMD_DEBUG_SET_MASK 0x34 +#define CMD_DEBUG_SET_SELECT 0x36 + +#define CMD_IDU_ENABLE 0x71 +#define CMD_IDU_DISABLE 0x72 +#define CMD_IDU_SET_MODE 0x74 +#define CMD_IDU_SET_DEST 0x76 +#define CMD_IDU_SET_MASK 0x7C + +#define IDU_M_TRIG_LEVEL 0x0 +#define IDU_M_TRIG_EDGE 0x1 + +#define IDU_M_DISTRI_RR 0x0 +#define IDU_M_DISTRI_DEST 0x2 +}; + +/* + * MCIP programming model + * + * - Simple commands write {cmd:8,param:16} to MCIP_CMD aux reg + * (param could be irq, common_irq, core_id ...) + * - More involved commands setup MCIP_WDATA with cmd specific data + * before invoking the simple command + */ +static inline void __mcip_cmd(unsigned int cmd, unsigned int param) +{ + struct mcip_cmd buf; + + buf.pad = 0; + buf.cmd = cmd; + buf.param = param; + + WRITE_AUX(ARC_REG_MCIP_CMD, buf); +} + +/* + * Setup additional data for a cmd + * Callers need to lock to ensure atomicity + */ +static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, + unsigned int data) +{ + write_aux_reg(ARC_REG_MCIP_WDATA, data); + + __mcip_cmd(cmd, param); +} + +extern void mcip_init_early_smp(void); +extern void mcip_init_smp(unsigned int cpu); + +#endif + +#endif diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 0be7ba087260..e7f3625a19b5 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_ARC_MCIP) += mcip.o obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index c5c64dc47655..6208c630abed 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -107,7 +107,7 @@ static struct irq_chip arcv2_irq_chip = { static int arcv2_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - if (irq == TIMER0_IRQ) + if (irq == TIMER0_IRQ || irq == IPI_IRQ) irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq); else irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq); diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c new file mode 100644 index 000000000000..e6ad6e64440a --- /dev/null +++ b/arch/arc/kernel/mcip.c @@ -0,0 +1,117 @@ +/* + * ARC ARConnect (MultiCore IP) support (formerly known as MCIP) + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +static char smp_cpuinfo_buf[128]; + +static DEFINE_RAW_SPINLOCK(mcip_lock); + + +/* + * Any SMP specific init any CPU does when it comes up. + * Here we setup the CPU to enable Inter-Processor-Interrupts + * Called for each CPU + * -Master : init_IRQ() + * -Other(s) : start_kernel_secondary() + */ +void mcip_init_smp(unsigned int cpu) +{ + smp_ipi_irq_setup(cpu, IPI_IRQ); +} + +static void mcip_ipi_send(int cpu) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu); + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static void mcip_ipi_clear(int irq) +{ + unsigned int cpu; + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + /* Who sent the IPI */ + __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0); + + cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */ + + __mcip_cmd(CMD_INTRPT_GENERATE_ACK, __ffs(cpu)); /* 0,1,2,3... */ + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +volatile int wake_flag; + +static void mcip_wakeup_cpu(int cpu, unsigned long pc) +{ + BUG_ON(cpu == 0); + wake_flag = cpu; +} + +void arc_platform_smp_wait_to_boot(int cpu) +{ + while (wake_flag != cpu) + ; + + wake_flag = 0; + __asm__ __volatile__("j @first_lines_of_secondary \n"); +} + +struct plat_smp_ops plat_smp_ops = { + .info = smp_cpuinfo_buf, + .cpu_kick = mcip_wakeup_cpu, + .ipi_send = mcip_ipi_send, + .ipi_clear = mcip_ipi_clear, +}; + +void mcip_init_early_smp(void) +{ +#define IS_AVAIL1(var, str) ((var) ? str : "") + + struct mcip_bcr { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:8, + idu:1, llm:1, num_cores:6, + iocoh:1, grtc:1, dbg:1, pad2:1, + msg:1, sem:1, ipi:1, pad:1, + ver:8; +#else + unsigned int ver:8, + pad:1, ipi:1, sem:1, msg:1, + pad2:1, dbg:1, grtc:1, iocoh:1, + num_cores:6, llm:1, idu:1, + pad3:8; +#endif + } mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); + + sprintf(smp_cpuinfo_buf, + "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n", + mp.ver, mp.num_cores, + IS_AVAIL1(mp.ipi, "IPI "), + IS_AVAIL1(mp.idu, "IDU "), + IS_AVAIL1(mp.dbg, "DEBUG "), + IS_AVAIL1(mp.grtc, "GRTC")); + + if (mp.dbg) { + __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); + __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); + } +} diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index 114fdc30941c..8795ae2ef48a 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -10,6 +10,7 @@ #include #include +#include /*----------------------- Machine Descriptions ------------------------------ * @@ -27,4 +28,8 @@ static const char *simulation_compat[] __initconst = { MACHINE_START(SIMULATION, "simulation") .dt_compat = simulation_compat, +#ifdef CONFIG_ARC_MCIP + .init_early = mcip_init_early_smp, + .init_smp = mcip_init_smp, +#endif MACHINE_END -- cgit v1.2.3-58-ga151 From 72d72880612705143ad32cf4ede0d6ae27e8b975 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 24 Dec 2014 18:41:55 +0530 Subject: ARCv2: SMP: clocksource: Enable Global Real Time counter Cc: Daniel Lezcano Cc: Thomas Gleixner Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 5 +++++ arch/arc/include/asm/mcip.h | 3 +++ arch/arc/kernel/mcip.c | 3 +++ arch/arc/kernel/time.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+) (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ef5ca5969eaf..1b684595e258 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -384,6 +384,11 @@ config ARC_HAS_RTC default n depends on !SMP +config ARC_HAS_GRTC + bool "SMP synchronized 64-bit cycle counter" + default y + depends on SMP + config ARC_NUMBER_OF_INTERRUPTS int "Number of interrupts" range 8 240 diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 31f9bac77a27..52c11f0bb0e5 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -39,6 +39,9 @@ struct mcip_cmd { #define CMD_DEBUG_SET_MASK 0x34 #define CMD_DEBUG_SET_SELECT 0x36 +#define CMD_GRTC_READ_LO 0x42 +#define CMD_GRTC_READ_HI 0x43 + #define CMD_IDU_ENABLE 0x71 #define CMD_IDU_DISABLE 0x72 #define CMD_IDU_SET_MODE 0x74 diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 35921c3ab394..ad7e90b97f6e 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -154,4 +154,7 @@ void mcip_init_early_smp(void) __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); } + + if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc) + panic("kernel trying to use non-existent GRTC\n"); } diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index da495478a40b..3364d2bbc515 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -45,6 +45,8 @@ #include #include +#include + /* Timer related Aux registers */ #define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */ #define ARC_REG_TIMER0_CTRL 0x22 /* timer 0 control */ @@ -60,6 +62,48 @@ /********** Clock Source Device *********/ +#ifdef CONFIG_ARC_HAS_GRTC + +static int arc_counter_setup(void) +{ + return 1; +} + +static cycle_t arc_counter_read(struct clocksource *cs) +{ + unsigned long flags; + union { +#ifdef CONFIG_CPU_BIG_ENDIAN + struct { u32 h, l; }; +#else + struct { u32 l, h; }; +#endif + cycle_t full; + } stamp; + + local_irq_save(flags); + + __mcip_cmd(CMD_GRTC_READ_LO, 0); + stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK); + + __mcip_cmd(CMD_GRTC_READ_HI, 0); + stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK); + + local_irq_restore(flags); + + return stamp.full; +} + +static struct clocksource arc_counter = { + .name = "ARConnect GRTC", + .rating = 400, + .read = arc_counter_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +#else + #ifdef CONFIG_ARC_HAS_RTC #define AUX_RTC_CTRL 0x103 @@ -134,6 +178,7 @@ static struct clocksource arc_counter = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +#endif #endif /********** Clock Event Device *********/ -- cgit v1.2.3-58-ga151 From d57f727264f1425a94689bafc7e99e502cb135b5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Nov 2014 15:54:01 +0530 Subject: ARC: add compiler barrier to LLSC based cmpxchg When auditing cmpxchg call sites, Chuck noted that gcc was optimizing away some of the desired LDs. | do { | new = old = *ipi_data_ptr; | new |= 1U << msg; | } while (cmpxchg(ipi_data_ptr, old, new) != old); was generating to below | 8015cef8: ld r2,[r4,0] <-- First LD | 8015cefc: bset r1,r2,r1 | | 8015cf00: llock r3,[r4] <-- atomic op | 8015cf04: brne r3,r2,8015cf10 | 8015cf08: scond r1,[r4] | 8015cf0c: bnz 8015cf00 | | 8015cf10: brne r3,r2,8015cf00 <-- Branch doesn't go to orig LD Although this was fixed by adding a ACCESS_ONCE in this call site, it seems safer (for now at least) to add compiler barrier to LLSC based cmpxchg Reported-by: Chuck Jordan Cc: Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cmpxchg.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d..90de5c528da2 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -25,10 +25,11 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) " scond %3, [%1] \n" " bnz 1b \n" "2: \n" - : "=&r"(prev) - : "r"(ptr), "ir"(expected), - "r"(new) /* can't be "ir". scond can't take limm for "b" */ - : "cc"); + : "=&r"(prev) /* Early clobber, to prevent reg reuse */ + : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ + "ir"(expected), + "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ + : "cc", "memory"); /* so that gcc knows memory is being written here */ return prev; } -- cgit v1.2.3-58-ga151 From 2576c28e3f623ed401db7e6197241865328620ef Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Nov 2014 15:42:09 +0530 Subject: ARC: add smp barriers around atomics per Documentation/atomic_ops.txt - arch_spin_lock/unlock were lacking the ACQUIRE/RELEASE barriers Since ARCv2 only provides load/load, store/store and all/all, we need the full barrier - LLOCK/SCOND based atomics, bitops, cmpxchg, which return modified values were lacking the explicit smp barriers. - Non LLOCK/SCOND varaints don't need the explicit barriers since that is implicity provided by the spin locks used to implement the critical section (the spin lock barriers in turn are also fixed in this commit as explained above Cc: Paul E. McKenney Cc: stable@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic.h | 21 +++++++++++++++++++++ arch/arc/include/asm/bitops.h | 19 +++++++++++++++++++ arch/arc/include/asm/cmpxchg.h | 17 +++++++++++++++++ arch/arc/include/asm/spinlock.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430..20b7dc17979e 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -43,6 +43,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned int temp; \ \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ @@ -52,6 +58,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ : "r"(&v->counter), "ir"(i) \ : "cc"); \ \ + smp_mb(); \ + \ return temp; \ } @@ -105,6 +113,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ unsigned long flags; \ unsigned long temp; \ \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ atomic_ops_lock(flags); \ temp = v->counter; \ temp c_op i; \ @@ -142,9 +153,19 @@ ATOMIC_OP(and, &=, and) #define __atomic_add_unless(v, a, u) \ ({ \ int c, old; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ c = old; \ + \ + smp_mb(); \ + \ c; \ }) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 829a8a2e9704..dd03fd931bb7 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -117,6 +117,12 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND themselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bset %1, %0, %3 \n" @@ -126,6 +132,8 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -139,6 +147,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bclr %1, %0, %3 \n" @@ -148,6 +158,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -161,6 +173,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%2] \n" " bxor %1, %0, %3 \n" @@ -170,6 +184,8 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) : "r"(m), "ir"(nr) : "cc"); + smp_mb(); + return (old & (1 << nr)) != 0; } @@ -249,6 +265,9 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) if (__builtin_constant_p(nr)) nr &= 0x1f; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ bitops_lock(flags); old = *m; diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 90de5c528da2..44fd531f4d7b 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -10,6 +10,8 @@ #define __ASM_ARC_CMPXCHG_H #include + +#include #include #ifdef CONFIG_ARC_HAS_LLSC @@ -19,6 +21,12 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) { unsigned long prev; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND thmeselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%1] \n" " brne %0, %2, 2f \n" @@ -31,6 +39,8 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ : "cc", "memory"); /* so that gcc knows memory is being written here */ + smp_mb(); + return prev; } @@ -43,6 +53,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) int prev; volatile unsigned long *p = ptr; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ atomic_ops_lock(flags); prev = *p; if (prev == expected) @@ -78,12 +91,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, switch (size) { case 4: + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r"(val) : "r"(ptr) : "memory"); + smp_mb(); + return val; } return __xchg_bad_pointer(); diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e..e1651df6a93d 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + /* + * This smp_mb() is technically superfluous, we only need the one + * after the lock for providing the ACQUIRE semantics. + * However doing the "right" thing was regressing hackbench + * so keeping this, pending further investigation + */ + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" : "+&r" (tmp) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); + + /* + * ACQUIRE barrier to ensure load/store after taking the lock + * don't "bleed-up" out of the critical section (leak-in is allowed) + * http://www.spinics.net/lists/kernel/msg2010409.html + * + * ARCv2 only has load-load, store-store and all-all barrier + * thus need the full all-all barrier + */ + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + smp_mb(); + return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); } @@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + /* + * RELEASE barrier: given the instructions avail on ARCv2, full barrier + * is the only option + */ + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + /* + * superfluous, but keeping for now - see pairing version in + * arch_spin_lock above + */ smp_mb(); } -- cgit v1.2.3-58-ga151 From b8a033023994c4e59697bb3b16b441b38f258390 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Mar 2015 21:42:37 +0530 Subject: ARCv2: barriers ARCv2 based HS38 cores are weakly ordered and thus explicit barriers for kernel proper. SMP barrier is provided by DMB instruction which also guarantees local barrier hence used as backend of smp_*mb() as well as *mb() APIs Also hookup barriers into MMIO accessors to avoid ordering issues in IO Cc: Peter Zijlstra (Intel) Reviewed-by: Will Deacon Signed-off-by: Vineet Gupta --- arch/arc/include/asm/Kbuild | 1 - arch/arc/include/asm/barrier.h | 48 ++++++++++++++++++++++++++++++++++++++++++ arch/arc/include/asm/io.h | 42 +++++++++++++++++++++++++++++++++--- 3 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 arch/arc/include/asm/barrier.h (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index be0c39e76f7c..59e2dd1d434f 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += auxvec.h -generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h new file mode 100644 index 000000000000..a7209983ee64 --- /dev/null +++ b/arch/arc/include/asm/barrier.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#ifdef CONFIG_ISA_ARCV2 + +/* + * ARCv2 based HS38 cores are in-order issue, but still weakly ordered + * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ... + * + * Explicit barrier provided by DMB instruction + * - Operand supports fine grained load/store/load+store semantics + * - Ensures that selected memory operation issued before it will complete + * before any subsequent memory operation of same type + * - DMB guarantees SMP as well as local barrier semantics + * (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e. + * UP: barrier(), SMP: smp_*mb == *mb) + * - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed + * in the general case. Plus it only provides full barrier. + */ + +#define mb() asm volatile("dmb 3\n" : : : "memory") +#define rmb() asm volatile("dmb 1\n" : : : "memory") +#define wmb() asm volatile("dmb 2\n" : : : "memory") + +#endif + +#ifdef CONFIG_ISA_ARCOMPACT + +/* + * ARCompact based cores (ARC700) only have SYNC instruction which is super + * heavy weight as it flushes the pipeline as well. + * There are no real SMP implementations of such cores. + */ + +#define mb() asm volatile("sync\n" : : : "memory") +#endif + +#include + +#endif diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index cabd518cb253..00f97a2f5fa6 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -98,9 +98,45 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } -#define readb_relaxed readb -#define readw_relaxed readw -#define readl_relaxed readl +#ifdef CONFIG_ISA_ARCV2 +#include +#define __iormb() rmb() +#define __iowmb() wmb() +#else +#define __iormb() do { } while (0) +#define __iowmb() do { } while (0) +#endif + +/* + * MMIO can also get buffered/optimized in micro-arch, so barriers needed + * Based on ARM model for the typical use case + * + * + * + * or: + * + * + * + * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com + */ +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) + +#define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) +#define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) +#define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) + +/* + * Relaxed API for drivers which can handle any ordering themselves + */ +#define readb_relaxed(c) __raw_readb(c) +#define readw_relaxed(c) __raw_readw(c) +#define readl_relaxed(c) __raw_readl(c) + +#define writeb_relaxed(v,c) __raw_writeb(v,c) +#define writew_relaxed(v,c) __raw_writew(v,c) +#define writel_relaxed(v,c) __raw_writel(v,c) #include -- cgit v1.2.3-58-ga151 From 04e2eee4b02edcafce96c9c37b31b1a3318291a4 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 Mar 2015 22:38:21 +0530 Subject: ARC: Reduce bitops lines of code using macros No semantical changes ! Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/bitops.h | 477 +++++++++++++----------------------------- 1 file changed, 144 insertions(+), 333 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index dd03fd931bb7..99fe118d3730 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -18,83 +18,50 @@ #include #include #include +#ifndef CONFIG_ARC_HAS_LLSC +#include +#endif -/* - * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns. - * The Kconfig glue ensures that in SMP, this is only set if the container - * SoC/platform has cross-core coherent LLOCK/SCOND - */ #if defined(CONFIG_ARC_HAS_LLSC) -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - /* - * ARC ISA micro-optimization: - * - * Instructions dealing with bitpos only consider lower 5 bits (0-31) - * e.g (x << 33) is handled like (x << 1) by ASL instruction - * (mem pointer still needs adjustment to point to next word) - * - * Hence the masking to clamp @nr arg can be elided in general. - * - * However if @nr is a constant (above assumed it in a register), - * and greater than 31, gcc can optimize away (x << 33) to 0, - * as overflow, given the 32-bit ISA. Thus masking needs to be done - * for constant @nr, but no code is generated due to const prop. - */ - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bset %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bclr %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; +/* + * Hardware assisted Atomic-R-M-W + */ - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bxor %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned int temp; \ + \ + m += nr >> 5; \ + \ + /* \ + * ARC ISA micro-optimization: \ + * \ + * Instructions dealing with bitpos only consider lower 5 bits \ + * e.g (x << 33) is handled like (x << 1) by ASL instruction \ + * (mem pointer still needs adjustment to point to next word) \ + * \ + * Hence the masking to clamp @nr arg can be elided in general. \ + * \ + * However if @nr is a constant (above assumed in a register), \ + * and greater than 31, gcc can optimize away (x << 33) to 0, \ + * as overflow, given the 32-bit ISA. Thus masking needs to be \ + * done for const @nr, but no code is generated due to gcc \ + * const prop. \ + */ \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " scond %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ + : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \ + "ir"(nr) \ + : "cc"); \ } /* @@ -108,91 +75,38 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *m) * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally * and the old value of bit is returned */ -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - /* - * Explicit full memory barrier needed before/after as - * LLOCK/SCOND themselves don't provide any such semantics - */ - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bset %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - smp_mb(); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bclr %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - smp_mb(); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bxor %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - smp_mb(); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, temp; \ + \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND themselves don't provide any such smenatic \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%2] \n" \ + " " #asm_op " %1, %0, %3 \n" \ + " scond %1, [%2] \n" \ + " bnz 1b \n" \ + : "=&r"(old), "=&r"(temp) \ + : "r"(m), "ir"(nr) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return (old & (1 << nr)) != 0; \ } #else /* !CONFIG_ARC_HAS_LLSC */ -#include - /* * Non hardware assisted Atomic-R-M-W * Locking would change to irq-disabling only (UP) and spinlocks (SMP) @@ -209,111 +123,43 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * at compile time) */ -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp | (1UL << nr); - - bitops_unlock(flags); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp & ~(1UL << nr); - - bitops_unlock(flags); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp ^ (1UL << nr); - - bitops_unlock(flags); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long temp, flags; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + /* \ + * spin lock/unlock provide the needed smp_mb() before/after \ + */ \ + bitops_lock(flags); \ + \ + temp = *m; \ + *m = temp c_op (1UL << nr); \ + \ + bitops_unlock(flags); \ } -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - /* - * spin lock/unlock provide the needed smp_mb() before/after - */ - bitops_lock(flags); - - old = *m; - *m = old | (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old & ~(1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old ^ (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, flags; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + bitops_lock(flags); \ + \ + old = *m; \ + *m = old c_op (1 << nr); \ + \ + bitops_unlock(flags); \ + \ + return (old & (1 << nr)) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -322,86 +168,51 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * Non atomic variants **************************************/ -static inline void __set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp | (1UL << nr); -} - -static inline void __clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp & ~(1UL << nr); -} - -static inline void __change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp ^ (1UL << nr); -} - -static inline int -__test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old | (1 << nr); - - return (old & (1 << nr)) != 0; +#define __BIT_OP(op, c_op, asm_op) \ +static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ +{ \ + unsigned long temp; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + temp = *m; \ + *m = temp c_op (1UL << nr); \ } -static inline int -__test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old & ~(1 << nr); - - return (old & (1 << nr)) != 0; +#define __TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + old = *m; \ + *m = old c_op (1 << nr); \ + \ + return (old & (1 << nr)) != 0; \ } -static inline int -__test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old ^ (1 << nr); - - return (old & (1 << nr)) != 0; -} +#define BIT_OPS(op, c_op, asm_op) \ + \ + /* set_bit(), clear_bit(), change_bit() */ \ + BIT_OP(op, c_op, asm_op) \ + \ + /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\ + TEST_N_BIT_OP(op, c_op, asm_op) \ + \ + /* __set_bit(), __clear_bit(), __change_bit() */ \ + __BIT_OP(op, c_op, asm_op) \ + \ + /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\ + __TEST_N_BIT_OP(op, c_op, asm_op) + +BIT_OPS(set, |, bset) +BIT_OPS(clear, & ~, bclr) +BIT_OPS(change, ^, bxor) /* * This routine doesn't need to be atomic. -- cgit v1.2.3-58-ga151 From a5c8b52abe677977883655166796f167ef1e0084 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 11 Dec 2014 16:05:16 +0530 Subject: ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock A quad core SMP build could get into hardware livelock with concurrent LLOCK/SCOND. Workaround that by adding a PREFETCHW which is serialized by SCU (System Coherency Unit). It brings the cache line in Exclusive state and makes others invalidate their lines. This gives enough time for winner to complete the LLOCK/SCOND, before others can get the line back. The prefetchw in the ll/sc loop is not nice but this is the only software workaround for current version of RTL. Cc: Peter Zijlstra (Intel) Cc: Will Deacon Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 20b7dc17979e..03484cb4d16d 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -23,13 +23,21 @@ #define atomic_set(v, i) (((v)->counter) = (i)) +#ifdef CONFIG_ISA_ARCV2 +#define PREFETCHW " prefetchw [%1] \n" +#else +#define PREFETCHW +#endif + #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ unsigned int temp; \ \ __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ + "1: \n" \ + PREFETCHW \ + " llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ " scond %0, [%1] \n" \ " bnz 1b \n" \ @@ -50,7 +58,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ smp_mb(); \ \ __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ + "1: \n" \ + PREFETCHW \ + " llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ " scond %0, [%1] \n" \ " bnz 1b \n" \ -- cgit v1.2.3-58-ga151 From 795f4558562fd5318260d5d8144a2f8612aeda7b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 3 Apr 2015 12:37:07 +0300 Subject: ARCv2: SLC: Handle explcit flush for DMA ops (w/o IO-coherency) L2 cache on ARCHS processors is called SLC (System Level Cache) For working DMA (in absence of hardware assisted IO Coherency) we need to manage SLC explicitly when buffers transition between cpu and controllers. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 11 ++++++++ arch/arc/mm/cache.c | 64 ++++++++++++++++++++++++++++++++++++++++++-- arch/arc/mm/dma.c | 12 +++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index d21c76d6b054..d67345d3e2d4 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -82,5 +82,16 @@ extern void read_decode_cache_bcr(void); /*System-level cache (L2 cache) related Auxiliary registers */ #define ARC_REG_SLC_CFG 0x901 +#define ARC_REG_SLC_CTRL 0x903 +#define ARC_REG_SLC_FLUSH 0x904 +#define ARC_REG_SLC_INVALIDATE 0x905 +#define ARC_REG_SLC_RGN_START 0x914 +#define ARC_REG_SLC_RGN_END 0x916 + +/* Bit val in SLC_CONTROL */ +#define SLC_CTRL_IM 0x040 +#define SLC_CTRL_DISABLE 0x001 +#define SLC_CTRL_BUSY 0x100 +#define SLC_CTRL_RGN_OP_INV 0x200 #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 0eaaee60fd0b..b29d62ed4f7e 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -21,6 +21,8 @@ #include #include +static int l2_line_sz; + void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, unsigned long sz, const int cacheop); @@ -120,13 +122,16 @@ dc_chk: p_dc->ver = dbcr.ver; slc_chk: + if (!is_isa_arcv2()) + return; + p_slc = &cpuinfo_arc700[cpu].slc; READ_BCR(ARC_REG_SLC_BCR, sbcr); if (sbcr.ver) { READ_BCR(ARC_REG_SLC_CFG, slc_cfg); p_slc->ver = sbcr.ver; p_slc->sz_k = 128 << slc_cfg.sz; - p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; } } @@ -460,6 +465,53 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ +noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + unsigned long flags; + unsigned int ctrl; + + local_irq_save(flags); + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); + write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + local_irq_restore(flags); +#endif +} + +static inline int need_slc_flush(void) +{ + return is_isa_arcv2() && l2_line_sz; +} /*********************************************************** * Exported APIs @@ -509,22 +561,30 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); - void dma_cache_wback_inv(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_FLUSH_N_INV); + + if (need_slc_flush()) + slc_op(start, sz, OP_FLUSH_N_INV); } EXPORT_SYMBOL(dma_cache_wback_inv); void dma_cache_inv(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_INV); + + if (need_slc_flush()) + slc_op(start, sz, OP_INV); } EXPORT_SYMBOL(dma_cache_inv); void dma_cache_wback(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_FLUSH); + + if (need_slc_flush()) + slc_op(start, sz, OP_FLUSH); } EXPORT_SYMBOL(dma_cache_wback); diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 2cfe81dca92a..74a637a1cfc4 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -66,6 +66,18 @@ void *dma_alloc_coherent(struct device *dev, size_t size, /* This is bus address, platform dependent */ *dma_handle = (dma_addr_t)paddr; + /* + * Evict any existing L1 and/or L2 lines for the backing page + * in case it was used earlier as a normal "cached" page. + * Yeah this bit us - STAR 9000898266 + * + * Although core does call flush_cache_vmap(), it gets kvaddr hence + * can't be used to efficiently flush L1 and/or L2 which need paddr + * Currently flush_cache_vmap nukes the L1 cache completely which + * will be optimized as a separate commit + */ + dma_cache_wback_inv((unsigned long)paddr, size); + return kvaddr; } EXPORT_SYMBOL(dma_alloc_coherent); -- cgit v1.2.3-58-ga151