diff options
Diffstat (limited to 'arch/powerpc/kernel')
73 files changed, 5461 insertions, 2216 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f960a7944553..445cb6e39d5b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -55,9 +55,10 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o -obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ + eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_FA_DUMP) += fadump.o @@ -100,7 +101,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o -pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o +pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o @@ -115,9 +116,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o -obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o - -ifneq ($(CONFIG_PPC_INDIRECT_IO),y) +ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o endif diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index ee5b690a0bed..a27ccd5dc6b9 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -651,6 +651,10 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, int sw = 0; int i, j; + /* userland only */ + if (unlikely(!user_mode(regs))) + return 0; + flush_vsx_to_thread(current); if (reg < 32) @@ -764,6 +768,16 @@ int fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; flags = aligninfo[instr].flags; + /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */ + if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) { + nb = 8; + flags = LD+SW; + } else if (IS_XFORM(instruction) && + ((instruction >> 1) & 0x3ff) == 660) { + nb = 8; + flags = ST+SW; + } + /* Byteswap little endian loads and stores */ swiz = 0; if (regs->msr & MSR_LE) { diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6f16ffafa6f0..502c7a4e73f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -80,10 +80,11 @@ int main(void) DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); @@ -105,9 +106,6 @@ int main(void) DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); #else /* CONFIG_PPC64 */ DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); -#endif #ifdef CONFIG_SPE DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); @@ -115,6 +113,9 @@ int main(void) DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); +#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif @@ -132,13 +133,15 @@ int main(void) DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); - DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra)); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); + DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar)); + DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); + DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, transact_vr[0])); @@ -452,6 +455,7 @@ int main(void) DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); #endif + DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index ac8f52732fde..41c011cb6070 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -25,11 +25,6 @@ static void scrollscreen(void); #endif -static void draw_byte(unsigned char c, long locX, long locY); -static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); -static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); -static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); - #define __force_data __attribute__((__section__(".data"))) static int g_loc_X __force_data; @@ -52,6 +47,26 @@ static unsigned char vga_font[cmapsz]; int boot_text_mapped __force_data = 0; int force_printk_to_btext = 0; +extern void rmci_on(void); +extern void rmci_off(void); + +static inline void rmci_maybe_on(void) +{ +#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64) + if (!(mfmsr() & MSR_DR)) + rmci_on(); +#endif +} + +static inline void rmci_maybe_off(void) +{ +#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64) + if (!(mfmsr() & MSR_DR)) + rmci_off(); +#endif +} + + #ifdef CONFIG_PPC32 /* Calc BAT values for mapping the display and store them * in disp_BAT. Those values are then used from head.S to map @@ -134,7 +149,7 @@ void __init btext_unmap(void) * changes. */ -static void map_boot_text(void) +void btext_map(void) { unsigned long base, offset, size; unsigned char *vbase; @@ -209,7 +224,7 @@ int btext_initialize(struct device_node *np) dispDeviceRect[2] = width; dispDeviceRect[3] = height; - map_boot_text(); + btext_map(); return 0; } @@ -283,7 +298,7 @@ void btext_update_display(unsigned long phys, int width, int height, iounmap(logicalDisplayBase); boot_text_mapped = 0; } - map_boot_text(); + btext_map(); g_loc_X = 0; g_loc_Y = 0; g_max_loc_X = width / 8; @@ -298,6 +313,7 @@ void btext_clearscreen(void) (dispDeviceDepth >> 3)) >> 2; int i,j; + rmci_maybe_on(); for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) { unsigned int *ptr = base; @@ -305,6 +321,7 @@ void btext_clearscreen(void) *(ptr++) = 0; base += (dispDeviceRowBytes >> 2); } + rmci_maybe_off(); } void btext_flushscreen(void) @@ -355,6 +372,8 @@ static void scrollscreen(void) (dispDeviceDepth >> 3)) >> 2; int i,j; + rmci_maybe_on(); + for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) { unsigned int *src_ptr = src; @@ -371,9 +390,116 @@ static void scrollscreen(void) *(dst_ptr++) = 0; dst += (dispDeviceRowBytes >> 2); } + + rmci_maybe_off(); } #endif /* ndef NO_SCROLL */ +static unsigned int expand_bits_8[16] = { + 0x00000000, + 0x000000ff, + 0x0000ff00, + 0x0000ffff, + 0x00ff0000, + 0x00ff00ff, + 0x00ffff00, + 0x00ffffff, + 0xff000000, + 0xff0000ff, + 0xff00ff00, + 0xff00ffff, + 0xffff0000, + 0xffff00ff, + 0xffffff00, + 0xffffffff +}; + +static unsigned int expand_bits_16[4] = { + 0x00000000, + 0x0000ffff, + 0xffff0000, + 0xffffffff +}; + + +static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (-(bits >> 7) & fg) ^ bg; + base[1] = (-((bits >> 6) & 1) & fg) ^ bg; + base[2] = (-((bits >> 5) & 1) & fg) ^ bg; + base[3] = (-((bits >> 4) & 1) & fg) ^ bg; + base[4] = (-((bits >> 3) & 1) & fg) ^ bg; + base[5] = (-((bits >> 2) & 1) & fg) ^ bg; + base[6] = (-((bits >> 1) & 1) & fg) ^ bg; + base[7] = (-(bits & 1) & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0xFFFFFFFFUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_16; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 6] & fg) ^ bg; + base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; + base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; + base[3] = (eb[bits & 3] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +{ + int l, bits; + int fg = 0x0F0F0F0FUL; + int bg = 0x00000000UL; + unsigned int *eb = (int *)expand_bits_8; + + for (l = 0; l < 16; ++l) + { + bits = *font++; + base[0] = (eb[bits >> 4] & fg) ^ bg; + base[1] = (eb[bits & 0xf] & fg) ^ bg; + base = (unsigned int *) ((char *)base + rb); + } +} + +static noinline void draw_byte(unsigned char c, long locX, long locY) +{ + unsigned char *base = calc_base(locX << 3, locY << 4); + unsigned char *font = &vga_font[((unsigned int)c) * 16]; + int rb = dispDeviceRowBytes; + + rmci_maybe_on(); + switch(dispDeviceDepth) { + case 24: + case 32: + draw_byte_32(font, (unsigned int *)base, rb); + break; + case 15: + case 16: + draw_byte_16(font, (unsigned int *)base, rb); + break; + case 8: + draw_byte_8(font, (unsigned int *)base, rb); + break; + } + rmci_maybe_off(); +} + void btext_drawchar(char c) { int cline = 0; @@ -465,107 +591,12 @@ void btext_drawhex(unsigned long v) btext_drawchar(' '); } -static void draw_byte(unsigned char c, long locX, long locY) -{ - unsigned char *base = calc_base(locX << 3, locY << 4); - unsigned char *font = &vga_font[((unsigned int)c) * 16]; - int rb = dispDeviceRowBytes; - - switch(dispDeviceDepth) { - case 24: - case 32: - draw_byte_32(font, (unsigned int *)base, rb); - break; - case 15: - case 16: - draw_byte_16(font, (unsigned int *)base, rb); - break; - case 8: - draw_byte_8(font, (unsigned int *)base, rb); - break; - } -} - -static unsigned int expand_bits_8[16] = { - 0x00000000, - 0x000000ff, - 0x0000ff00, - 0x0000ffff, - 0x00ff0000, - 0x00ff00ff, - 0x00ffff00, - 0x00ffffff, - 0xff000000, - 0xff0000ff, - 0xff00ff00, - 0xff00ffff, - 0xffff0000, - 0xffff00ff, - 0xffffff00, - 0xffffffff -}; - -static unsigned int expand_bits_16[4] = { - 0x00000000, - 0x0000ffff, - 0xffff0000, - 0xffffffff -}; - - -static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) -{ - int l, bits; - int fg = 0xFFFFFFFFUL; - int bg = 0x00000000UL; - - for (l = 0; l < 16; ++l) - { - bits = *font++; - base[0] = (-(bits >> 7) & fg) ^ bg; - base[1] = (-((bits >> 6) & 1) & fg) ^ bg; - base[2] = (-((bits >> 5) & 1) & fg) ^ bg; - base[3] = (-((bits >> 4) & 1) & fg) ^ bg; - base[4] = (-((bits >> 3) & 1) & fg) ^ bg; - base[5] = (-((bits >> 2) & 1) & fg) ^ bg; - base[6] = (-((bits >> 1) & 1) & fg) ^ bg; - base[7] = (-(bits & 1) & fg) ^ bg; - base = (unsigned int *) ((char *)base + rb); - } -} - -static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) -{ - int l, bits; - int fg = 0xFFFFFFFFUL; - int bg = 0x00000000UL; - unsigned int *eb = (int *)expand_bits_16; - - for (l = 0; l < 16; ++l) - { - bits = *font++; - base[0] = (eb[bits >> 6] & fg) ^ bg; - base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; - base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; - base[3] = (eb[bits & 3] & fg) ^ bg; - base = (unsigned int *) ((char *)base + rb); - } -} - -static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +void __init udbg_init_btext(void) { - int l, bits; - int fg = 0x0F0F0F0FUL; - int bg = 0x00000000UL; - unsigned int *eb = (int *)expand_bits_8; - - for (l = 0; l < 16; ++l) - { - bits = *font++; - base[0] = (eb[bits >> 4] & fg) ^ bg; - base[1] = (eb[bits & 0xf] & fg) ^ bg; - base = (unsigned int *) ((char *)base + rb); - } + /* If btext is enabled, we might have a BAT setup for early display, + * thus we do enable some very basic udbg output + */ + udbg_putc = btext_drawchar; } static unsigned char vga_font[cmapsz] = { @@ -913,10 +944,3 @@ static unsigned char vga_font[cmapsz] = { 0x00, 0x00, 0x00, 0x00, }; -void __init udbg_init_btext(void) -{ - /* If btext is enabled, we might have a BAT setup for early display, - * thus we do enable some very basic udbg output - */ - udbg_putc = btext_drawchar; -} diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 92c6b008dd2b..654932727873 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -131,7 +131,8 @@ static const char *cache_type_string(const struct cache *cache) return cache_type_info[cache->type].name; } -static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode) +static void cache_init(struct cache *cache, int type, int level, + struct device_node *ofnode) { cache->type = type; cache->level = level; @@ -140,7 +141,7 @@ static void __cpuinit cache_init(struct cache *cache, int type, int level, struc list_add(&cache->list, &cache_list); } -static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode) +static struct cache *new_cache(int type, int level, struct device_node *ofnode) { struct cache *cache; @@ -195,7 +196,7 @@ static void cache_cpu_set(struct cache *cache, int cpu) static int cache_size(const struct cache *cache, unsigned int *ret) { const char *propname; - const u32 *cache_size; + const __be32 *cache_size; propname = cache_type_info[cache->type].size_prop; @@ -203,7 +204,7 @@ static int cache_size(const struct cache *cache, unsigned int *ret) if (!cache_size) return -ENODEV; - *ret = *cache_size; + *ret = of_read_number(cache_size, 1); return 0; } @@ -221,7 +222,7 @@ static int cache_size_kb(const struct cache *cache, unsigned int *ret) /* not cache_line_size() because that's a macro in include/linux/cache.h */ static int cache_get_line_size(const struct cache *cache, unsigned int *ret) { - const u32 *line_size; + const __be32 *line_size; int i, lim; lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props); @@ -238,14 +239,14 @@ static int cache_get_line_size(const struct cache *cache, unsigned int *ret) if (!line_size) return -ENODEV; - *ret = *line_size; + *ret = of_read_number(line_size, 1); return 0; } static int cache_nr_sets(const struct cache *cache, unsigned int *ret) { const char *propname; - const u32 *nr_sets; + const __be32 *nr_sets; propname = cache_type_info[cache->type].nr_sets_prop; @@ -253,7 +254,7 @@ static int cache_nr_sets(const struct cache *cache, unsigned int *ret) if (!nr_sets) return -ENODEV; - *ret = *nr_sets; + *ret = of_read_number(nr_sets, 1); return 0; } @@ -324,7 +325,8 @@ static bool cache_node_is_unified(const struct device_node *np) return of_get_property(np, "cache-unified", NULL); } -static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level) +static struct cache *cache_do_one_devnode_unified(struct device_node *node, + int level) { struct cache *cache; @@ -335,7 +337,8 @@ static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node * return cache; } -static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level) +static struct cache *cache_do_one_devnode_split(struct device_node *node, + int level) { struct cache *dcache, *icache; @@ -357,7 +360,7 @@ err: return NULL; } -static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level) +static struct cache *cache_do_one_devnode(struct device_node *node, int level) { struct cache *cache; @@ -369,7 +372,8 @@ static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, in return cache; } -static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level) +static struct cache *cache_lookup_or_instantiate(struct device_node *node, + int level) { struct cache *cache; @@ -385,7 +389,7 @@ static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *n return cache; } -static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger) +static void link_cache_lists(struct cache *smaller, struct cache *bigger) { while (smaller->next_local) { if (smaller->next_local == bigger) @@ -396,13 +400,13 @@ static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigg smaller->next_local = bigger; } -static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache) +static void do_subsidiary_caches_debugcheck(struct cache *cache) { WARN_ON_ONCE(cache->level != 1); WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu")); } -static void __cpuinit do_subsidiary_caches(struct cache *cache) +static void do_subsidiary_caches(struct cache *cache) { struct device_node *subcache_node; int level = cache->level; @@ -423,7 +427,7 @@ static void __cpuinit do_subsidiary_caches(struct cache *cache) } } -static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id) +static struct cache *cache_chain_instantiate(unsigned int cpu_id) { struct device_node *cpu_node; struct cache *cpu_cache = NULL; @@ -448,7 +452,7 @@ out: return cpu_cache; } -static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id) +static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id) { struct cache_dir *cache_dir; struct device *dev; @@ -653,7 +657,7 @@ static struct kobj_type cache_index_type = { .default_attrs = cache_index_default_attrs, }; -static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) +static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) { const char *cache_name; const char *cache_type; @@ -696,7 +700,8 @@ static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *d kfree(buf); } -static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir) +static void cacheinfo_create_index_dir(struct cache *cache, int index, + struct cache_dir *cache_dir) { struct cache_index_dir *index_dir; int rc; @@ -722,7 +727,8 @@ err: kfree(index_dir); } -static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list) +static void cacheinfo_sysfs_populate(unsigned int cpu_id, + struct cache *cache_list) { struct cache_dir *cache_dir; struct cache *cache; @@ -740,7 +746,7 @@ static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache } } -void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id) +void cacheinfo_cpu_online(unsigned int cpu_id) { struct cache *cache; diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 0b9af015bedc..bfb18c7290b7 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -75,7 +75,7 @@ _GLOBAL(__setup_cpu_e500v2) bl __e500_icache_setup bl __e500_dcache_setup bl __setup_e500_ivors -#ifdef CONFIG_FSL_RIO +#if defined(CONFIG_FSL_RIO) || defined(CONFIG_FSL_PCI) /* Ensure that RFXE is set */ mfspr r3,SPRN_HID1 oris r3,r3,HID1_RFXE@h diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1f0937d7d4b5..597d954e5860 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -452,8 +452,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER8, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_cpu_type = 0, + .oprofile_type = PPC_OPROFILE_INVALID, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .platform = "power8", @@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_power7, .platform = "power7+", }, - { /* Power8 */ + { /* Power8E */ .pvr_mask = 0xffff0000, .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .platform = "power8", + }, + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, .cpu_name = "POWER8 (raw)", .cpu_features = CPU_FTRS_POWER8, .cpu_user_features = COMMON_USER_POWER8, @@ -506,8 +524,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = 0, - .oprofile_type = PPC_OPROFILE_POWER4, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .platform = "power8", @@ -2087,7 +2105,7 @@ static struct cpu_spec __initdata cpu_specs[] = { MMU_FTR_USE_TLBILX, .icache_bsize = 64, .dcache_bsize = 64, - .num_pmcs = 4, + .num_pmcs = 6, .oprofile_cpu_type = "ppc/e6500", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e6500, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9ec3fe174cba..779a78c26435 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_NONSTATIC_KERNEL */ -static int __init parse_savemaxmem(char *p) -{ - if (p) - saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1; - - return 1; -} -__setup("savemaxmem=", parse_savemaxmem); - - static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize, unsigned long offset, int userbuf) { diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c new file mode 100644 index 000000000000..55593ee2d5aa --- /dev/null +++ b/arch/powerpc/kernel/eeh.c @@ -0,0 +1,1068 @@ +/* + * Copyright IBM Corporation 2001, 2005, 2006 + * Copyright Dave Engebretsen & Todd Inglett 2001 + * Copyright Linas Vepstas 2005, 2006 + * Copyright 2001-2012 IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> + */ + +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/rbtree.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/export.h> +#include <linux/of.h> + +#include <linux/atomic.h> +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> +#include <asm/rtas.h> + + +/** Overview: + * EEH, or "Extended Error Handling" is a PCI bridge technology for + * dealing with PCI bus errors that can't be dealt with within the + * usual PCI framework, except by check-stopping the CPU. Systems + * that are designed for high-availability/reliability cannot afford + * to crash due to a "mere" PCI error, thus the need for EEH. + * An EEH-capable bridge operates by converting a detected error + * into a "slot freeze", taking the PCI adapter off-line, making + * the slot behave, from the OS'es point of view, as if the slot + * were "empty": all reads return 0xff's and all writes are silently + * ignored. EEH slot isolation events can be triggered by parity + * errors on the address or data busses (e.g. during posted writes), + * which in turn might be caused by low voltage on the bus, dust, + * vibration, humidity, radioactivity or plain-old failed hardware. + * + * Note, however, that one of the leading causes of EEH slot + * freeze events are buggy device drivers, buggy device microcode, + * or buggy device hardware. This is because any attempt by the + * device to bus-master data to a memory address that is not + * assigned to the device will trigger a slot freeze. (The idea + * is to prevent devices-gone-wild from corrupting system memory). + * Buggy hardware/drivers will have a miserable time co-existing + * with EEH. + * + * Ideally, a PCI device driver, when suspecting that an isolation + * event has occurred (e.g. by reading 0xff's), will then ask EEH + * whether this is the case, and then take appropriate steps to + * reset the PCI slot, the PCI device, and then resume operations. + * However, until that day, the checking is done here, with the + * eeh_check_failure() routine embedded in the MMIO macros. If + * the slot is found to be isolated, an "EEH Event" is synthesized + * and sent out for processing. + */ + +/* If a device driver keeps reading an MMIO register in an interrupt + * handler after a slot isolation event, it might be broken. + * This sets the threshold for how many read attempts we allow + * before printing an error message. + */ +#define EEH_MAX_FAILS 2100000 + +/* Time to wait for a PCI slot to report status, in milliseconds */ +#define PCI_BUS_RESET_WAIT_MSEC (60*1000) + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; + +int eeh_subsystem_enabled; +EXPORT_SYMBOL(eeh_subsystem_enabled); + +/* + * EEH probe mode support. The intention is to support multiple + * platforms for EEH. Some platforms like pSeries do PCI emunation + * based on device tree. However, other platforms like powernv probe + * PCI devices from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for particular + * OF node or PCI device so that the corresponding PE would be created + * there. + */ +int eeh_probe_mode; + +/* Lock to avoid races due to multiple reports of an error */ +DEFINE_RAW_SPINLOCK(confirm_error_lock); + +/* Buffer for reporting pci register dumps. Its here in BSS, and + * not dynamically alloced, so that it ends up in RMO where RTAS + * can access it. + */ +#define EEH_PCI_REGS_LOG_LEN 4096 +static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; + +/* + * The struct is used to maintain the EEH global statistic + * information. Besides, the EEH global statistics will be + * exported to user space through procfs + */ +struct eeh_stats { + u64 no_device; /* PCI device not found */ + u64 no_dn; /* OF node not found */ + u64 no_cfg_addr; /* Config address not found */ + u64 ignored_check; /* EEH check skipped */ + u64 total_mmio_ffs; /* Total EEH checks */ + u64 false_positives; /* Unnecessary EEH checks */ + u64 slot_resets; /* PE reset */ +}; + +static struct eeh_stats eeh_stats; + +#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) + +/** + * eeh_gather_pci_data - Copy assorted PCI config space registers to buff + * @edev: device to report data for + * @buf: point to buffer in which to log + * @len: amount of room in buffer + * + * This routine captures assorted PCI configuration space data, + * and puts them into a buffer for RTAS error logging. + */ +static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) +{ + struct device_node *dn = eeh_dev_to_of_node(edev); + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + u32 cfg; + int cap, i; + int n = 0; + + n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); + printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); + + eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); + n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); + printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); + + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); + n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); + printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); + + if (!dev) { + printk(KERN_WARNING "EEH: no PCI device for this of node\n"); + return n; + } + + /* Gather bridge-specific registers */ + if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { + eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); + n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); + printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); + + eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); + n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); + printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); + } + + /* Dump out the PCI-X command and status regs */ + cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); + if (cap) { + eeh_ops->read_config(dn, cap, 4, &cfg); + n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); + printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); + + eeh_ops->read_config(dn, cap+4, 4, &cfg); + n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); + printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); + } + + /* If PCI-E capable, dump PCI-E cap 10, and the AER */ + cap = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); + printk(KERN_WARNING + "EEH: PCI-E capabilities and status follow:\n"); + + for (i=0; i<=8; i++) { + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); + } + + cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); + printk(KERN_WARNING + "EEH: PCI-E AER capability register set follows:\n"); + + for (i=0; i<14; i++) { + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); + } + } + } + + return n; +} + +/** + * eeh_slot_error_detail - Generate combined log including driver log and error log + * @pe: EEH PE + * @severity: temporary or permanent error log + * + * This routine should be called to generate the combined log, which + * is comprised of driver log and error log. The driver log is figured + * out from the config space of the corresponding PCI device, while + * the error log is fetched through platform dependent function call. + */ +void eeh_slot_error_detail(struct eeh_pe *pe, int severity) +{ + size_t loglen = 0; + struct eeh_dev *edev, *tmp; + bool valid_cfg_log = true; + + /* + * When the PHB is fenced or dead, it's pointless to collect + * the data from PCI config space because it should return + * 0xFF's. For ER, we still retrieve the data from the PCI + * config space. + */ + if (eeh_probe_mode_dev() && + (pe->type & EEH_PE_PHB) && + (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) + valid_cfg_log = false; + + if (valid_cfg_log) { + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); + + pci_regs_buf[0] = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, + EEH_PCI_REGS_LOG_LEN - loglen); + } + } + + eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); +} + +/** + * eeh_token_to_phys - Convert EEH address token to phys address + * @token: I/O token, should be address in the form 0xA.... + * + * This routine should be called to convert virtual I/O address + * to physical one. + */ +static inline unsigned long eeh_token_to_phys(unsigned long token) +{ + pte_t *ptep; + unsigned long pa; + int hugepage_shift; + + /* + * We won't find hugepages here, iomem + */ + ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); + if (!ptep) + return token; + WARN_ON(hugepage_shift); + pa = pte_pfn(*ptep) << PAGE_SHIFT; + + return pa | (token & (PAGE_SIZE-1)); +} + +/* + * On PowerNV platform, we might already have fenced PHB there. + * For that case, it's meaningless to recover frozen PE. Intead, + * We have to handle fenced PHB firstly. + */ +static int eeh_phb_check_failure(struct eeh_pe *pe) +{ + struct eeh_pe *phb_pe; + unsigned long flags; + int ret; + + if (!eeh_probe_mode_dev()) + return -EPERM; + + /* Find the PHB PE */ + phb_pe = eeh_phb_pe_get(pe->phb); + if (!phb_pe) { + pr_warning("%s Can't find PE for PHB#%d\n", + __func__, pe->phb->global_number); + return -EEXIST; + } + + /* If the PHB has been in problematic state */ + eeh_serialize_lock(&flags); + if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + ret = 0; + goto out; + } + + /* Check PHB state */ + ret = eeh_ops->get_state(phb_pe, NULL); + if ((ret < 0) || + (ret == EEH_STATE_NOT_SUPPORT) || + (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + ret = 0; + goto out; + } + + /* Isolate the PHB and send event */ + eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + eeh_serialize_unlock(flags); + eeh_send_failure_event(phb_pe); + + pr_err("EEH: PHB#%x failure detected\n", + phb_pe->phb->global_number); + dump_stack(); + + return 1; +out: + eeh_serialize_unlock(flags); + return ret; +} + +/** + * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze + * @edev: eeh device + * + * Check for an EEH failure for the given device node. Call this + * routine if the result of a read was all 0xff's and you want to + * find out if this is due to an EEH slot freeze. This routine + * will query firmware for the EEH status. + * + * Returns 0 if there has not been an EEH error; otherwise returns + * a non-zero value and queues up a slot isolation event notification. + * + * It is safe to call this routine in an interrupt context. + */ +int eeh_dev_check_failure(struct eeh_dev *edev) +{ + int ret; + unsigned long flags; + struct device_node *dn; + struct pci_dev *dev; + struct eeh_pe *pe; + int rc = 0; + const char *location; + + eeh_stats.total_mmio_ffs++; + + if (!eeh_subsystem_enabled) + return 0; + + if (!edev) { + eeh_stats.no_dn++; + return 0; + } + dn = eeh_dev_to_of_node(edev); + dev = eeh_dev_to_pci_dev(edev); + pe = edev->pe; + + /* Access to IO BARs might get this far and still not want checking. */ + if (!pe) { + eeh_stats.ignored_check++; + pr_debug("EEH: Ignored check for %s %s\n", + eeh_pci_name(dev), dn->full_name); + return 0; + } + + if (!pe->addr && !pe->config_addr) { + eeh_stats.no_cfg_addr++; + return 0; + } + + /* + * On PowerNV platform, we might already have fenced PHB + * there and we need take care of that firstly. + */ + ret = eeh_phb_check_failure(pe); + if (ret > 0) + return ret; + + /* If we already have a pending isolation event for this + * slot, we know it's bad already, we don't need to check. + * Do this checking under a lock; as multiple PCI devices + * in one slot might report errors simultaneously, and we + * only want one error recovery routine running. + */ + eeh_serialize_lock(&flags); + rc = 1; + if (pe->state & EEH_PE_ISOLATED) { + pe->check_count++; + if (pe->check_count % EEH_MAX_FAILS == 0) { + location = of_get_property(dn, "ibm,loc-code", NULL); + printk(KERN_ERR "EEH: %d reads ignored for recovering device at " + "location=%s driver=%s pci addr=%s\n", + pe->check_count, location, + eeh_driver_name(dev), eeh_pci_name(dev)); + printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", + eeh_driver_name(dev)); + dump_stack(); + } + goto dn_unlock; + } + + /* + * Now test for an EEH failure. This is VERY expensive. + * Note that the eeh_config_addr may be a parent device + * in the case of a device behind a bridge, or it may be + * function zero of a multi-function device. + * In any case they must share a common PHB. + */ + ret = eeh_ops->get_state(pe, NULL); + + /* Note that config-io to empty slots may fail; + * they are empty when they don't have children. + * We will punt with the following conditions: Failure to get + * PE's state, EEH not support and Permanently unavailable + * state, PE is in good state. + */ + if ((ret < 0) || + (ret == EEH_STATE_NOT_SUPPORT) || + (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + eeh_stats.false_positives++; + pe->false_positives++; + rc = 0; + goto dn_unlock; + } + + eeh_stats.slot_resets++; + + /* Avoid repeated reports of this failure, including problems + * with other functions on this device, and functions under + * bridges. + */ + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_serialize_unlock(flags); + + eeh_send_failure_event(pe); + + /* Most EEH events are due to device driver bugs. Having + * a stack trace will help the device-driver authors figure + * out what happened. So print that out. + */ + pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", + pe->addr, pe->phb->global_number); + dump_stack(); + + return 1; + +dn_unlock: + eeh_serialize_unlock(flags); + return rc; +} + +EXPORT_SYMBOL_GPL(eeh_dev_check_failure); + +/** + * eeh_check_failure - Check if all 1's data is due to EEH slot freeze + * @token: I/O token, should be address in the form 0xA.... + * @val: value, should be all 1's (XXX why do we need this arg??) + * + * Check for an EEH failure at the given token address. Call this + * routine if the result of a read was all 0xff's and you want to + * find out if this is due to an EEH slot freeze event. This routine + * will query firmware for the EEH status. + * + * Note this routine is safe to call in an interrupt context. + */ +unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +{ + unsigned long addr; + struct eeh_dev *edev; + + /* Finding the phys addr + pci device; this is pretty quick. */ + addr = eeh_token_to_phys((unsigned long __force) token); + edev = eeh_addr_cache_get_dev(addr); + if (!edev) { + eeh_stats.no_device++; + return val; + } + + eeh_dev_check_failure(edev); + return val; +} + +EXPORT_SYMBOL(eeh_check_failure); + + +/** + * eeh_pci_enable - Enable MMIO or DMA transfers for this slot + * @pe: EEH PE + * + * This routine should be called to reenable frozen MMIO or DMA + * so that it would work correctly again. It's useful while doing + * recovery or log collection on the indicated device. + */ +int eeh_pci_enable(struct eeh_pe *pe, int function) +{ + int rc; + + rc = eeh_ops->set_option(pe, function); + if (rc) + pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, pe->addr, rc); + + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && + (function == EEH_OPT_THAW_MMIO)) + return 0; + + return rc; +} + +/** + * pcibios_set_pcie_slot_reset - Set PCI-E reset state + * @dev: pci device struct + * @state: reset state to enter + * + * Return value: + * 0 if success + */ +int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) +{ + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + struct eeh_pe *pe = edev->pe; + + if (!pe) { + pr_err("%s: No PE found on PCI device %s\n", + __func__, pci_name(dev)); + return -EINVAL; + } + + switch (state) { + case pcie_deassert_reset: + eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + break; + case pcie_hot_reset: + eeh_ops->reset(pe, EEH_RESET_HOT); + break; + case pcie_warm_reset: + eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); + break; + default: + return -EINVAL; + }; + + return 0; +} + +/** + * eeh_set_pe_freset - Check the required reset for the indicated device + * @data: EEH device + * @flag: return value + * + * Each device might have its preferred reset type: fundamental or + * hot reset. The routine is used to collected the information for + * the indicated device and its children so that the bunch of the + * devices could be reset properly. + */ +static void *eeh_set_dev_freset(void *data, void *flag) +{ + struct pci_dev *dev; + unsigned int *freset = (unsigned int *)flag; + struct eeh_dev *edev = (struct eeh_dev *)data; + + dev = eeh_dev_to_pci_dev(edev); + if (dev) + *freset |= dev->needs_freset; + + return NULL; +} + +/** + * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second + * @pe: EEH PE + * + * Assert the PCI #RST line for 1/4 second. + */ +static void eeh_reset_pe_once(struct eeh_pe *pe) +{ + unsigned int freset = 0; + + /* Determine type of EEH reset required for + * Partitionable Endpoint, a hot-reset (1) + * or a fundamental reset (3). + * A fundamental reset required by any device under + * Partitionable Endpoint trumps hot-reset. + */ + eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); + + if (freset) + eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); + else + eeh_ops->reset(pe, EEH_RESET_HOT); + + /* The PCI bus requires that the reset be held high for at least + * a 100 milliseconds. We wait a bit longer 'just in case'. + */ +#define PCI_BUS_RST_HOLD_TIME_MSEC 250 + msleep(PCI_BUS_RST_HOLD_TIME_MSEC); + + /* We might get hit with another EEH freeze as soon as the + * pci slot reset line is dropped. Make sure we don't miss + * these, and clear the flag now. + */ + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + + /* After a PCI slot has been reset, the PCI Express spec requires + * a 1.5 second idle time for the bus to stabilize, before starting + * up traffic. + */ +#define PCI_BUS_SETTLE_TIME_MSEC 1800 + msleep(PCI_BUS_SETTLE_TIME_MSEC); +} + +/** + * eeh_reset_pe - Reset the indicated PE + * @pe: EEH PE + * + * This routine should be called to reset indicated device, including + * PE. A PE might include multiple PCI devices and sometimes PCI bridges + * might be involved as well. + */ +int eeh_reset_pe(struct eeh_pe *pe) +{ + int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int i, rc; + + /* Take three shots at resetting the bus */ + for (i=0; i<3; i++) { + eeh_reset_pe_once(pe); + + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if ((rc & flags) == flags) + return 0; + + if (rc < 0) { + pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + __func__, pe->phb->global_number, pe->addr); + return -1; + } + pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", + i+1, pe->phb->global_number, pe->addr, rc); + } + + return -1; +} + +/** + * eeh_save_bars - Save device bars + * @edev: PCI device associated EEH device + * + * Save the values of the device bars. Unlike the restore + * routine, this routine is *not* recursive. This is because + * PCI devices are added individually; but, for the restore, + * an entire slot is reset at a time. + */ +void eeh_save_bars(struct eeh_dev *edev) +{ + int i; + struct device_node *dn; + + if (!edev) + return; + dn = eeh_dev_to_of_node(edev); + + for (i = 0; i < 16; i++) + eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); +} + +/** + * eeh_ops_register - Register platform dependent EEH operations + * @ops: platform dependent EEH operations + * + * Register the platform dependent EEH operation callback + * functions. The platform should call this function before + * any other EEH operations. + */ +int __init eeh_ops_register(struct eeh_ops *ops) +{ + if (!ops->name) { + pr_warning("%s: Invalid EEH ops name for %p\n", + __func__, ops); + return -EINVAL; + } + + if (eeh_ops && eeh_ops != ops) { + pr_warning("%s: EEH ops of platform %s already existing (%s)\n", + __func__, eeh_ops->name, ops->name); + return -EEXIST; + } + + eeh_ops = ops; + + return 0; +} + +/** + * eeh_ops_unregister - Unreigster platform dependent EEH operations + * @name: name of EEH platform operations + * + * Unregister the platform dependent EEH operation callback + * functions. + */ +int __exit eeh_ops_unregister(const char *name) +{ + if (!name || !strlen(name)) { + pr_warning("%s: Invalid EEH ops name\n", + __func__); + return -EINVAL; + } + + if (eeh_ops && !strcmp(eeh_ops->name, name)) { + eeh_ops = NULL; + return 0; + } + + return -EEXIST; +} + +/** + * eeh_init - EEH initialization + * + * Initialize EEH by trying to enable it for all of the adapters in the system. + * As a side effect we can determine here if eeh is supported at all. + * Note that we leave EEH on so failed config cycles won't cause a machine + * check. If a user turns off EEH for a particular adapter they are really + * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't + * grant access to a slot if EEH isn't enabled, and so we always enable + * EEH for all slots/all devices. + * + * The eeh-force-off option disables EEH checking globally, for all slots. + * Even if force-off is set, the EEH hardware is still enabled, so that + * newer systems can boot. + */ +int eeh_init(void) +{ + struct pci_controller *hose, *tmp; + struct device_node *phb; + static int cnt = 0; + int ret = 0; + + /* + * We have to delay the initialization on PowerNV after + * the PCI hierarchy tree has been built because the PEs + * are figured out based on PCI devices instead of device + * tree nodes + */ + if (machine_is(powernv) && cnt++ <= 0) + return ret; + + /* call platform initialization function */ + if (!eeh_ops) { + pr_warning("%s: Platform EEH operation not found\n", + __func__); + return -EEXIST; + } else if ((ret = eeh_ops->init())) { + pr_warning("%s: Failed to call platform init function (%d)\n", + __func__, ret); + return ret; + } + + /* Initialize EEH event */ + ret = eeh_event_init(); + if (ret) + return ret; + + /* Enable EEH for all adapters */ + if (eeh_probe_mode_devtree()) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb = hose->dn; + traverse_pci_devices(phb, eeh_ops->of_probe, NULL); + } + } else if (eeh_probe_mode_dev()) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) + pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); + } else { + pr_warning("%s: Invalid probe mode %d\n", + __func__, eeh_probe_mode); + return -EINVAL; + } + + /* + * Call platform post-initialization. Actually, It's good chance + * to inform platform that EEH is ready to supply service if the + * I/O cache stuff has been built up. + */ + if (eeh_ops->post_init) { + ret = eeh_ops->post_init(); + if (ret) + return ret; + } + + if (eeh_subsystem_enabled) + pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); + else + pr_warning("EEH: No capable adapters found\n"); + + return ret; +} + +core_initcall_sync(eeh_init); + +/** + * eeh_add_device_early - Enable EEH for the indicated device_node + * @dn: device node for which to set up EEH + * + * This routine must be used to perform EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). + * This routine must be called before any i/o is performed to the + * adapter (inluding any config-space i/o). + * Whether this actually enables EEH or not for this device depends + * on the CEC architecture, type of the device, on earlier boot + * command-line arguments & etc. + */ +void eeh_add_device_early(struct device_node *dn) +{ + struct pci_controller *phb; + + /* + * If we're doing EEH probe based on PCI device, we + * would delay the probe until late stage because + * the PCI device isn't available this moment. + */ + if (!eeh_probe_mode_devtree()) + return; + + if (!of_node_to_eeh_dev(dn)) + return; + phb = of_node_to_eeh_dev(dn)->phb; + + /* USB Bus children of PCI devices will not have BUID's */ + if (NULL == phb || 0 == phb->buid) + return; + + eeh_ops->of_probe(dn, NULL); +} + +/** + * eeh_add_device_tree_early - Enable EEH for the indicated device + * @dn: device node + * + * This routine must be used to perform EEH initialization for the + * indicated PCI device that was added after system boot (e.g. + * hotplug, dlpar). + */ +void eeh_add_device_tree_early(struct device_node *dn) +{ + struct device_node *sib; + + for_each_child_of_node(dn, sib) + eeh_add_device_tree_early(sib); + eeh_add_device_early(dn); +} +EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); + +/** + * eeh_add_device_late - Perform EEH initialization for the indicated pci device + * @dev: pci device for which to set up EEH + * + * This routine must be used to complete EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). + */ +void eeh_add_device_late(struct pci_dev *dev) +{ + struct device_node *dn; + struct eeh_dev *edev; + + if (!dev || !eeh_subsystem_enabled) + return; + + pr_debug("EEH: Adding device %s\n", pci_name(dev)); + + dn = pci_device_to_OF_node(dev); + edev = of_node_to_eeh_dev(dn); + if (edev->pdev == dev) { + pr_debug("EEH: Already referenced !\n"); + return; + } + + /* + * The EEH cache might not be removed correctly because of + * unbalanced kref to the device during unplug time, which + * relies on pcibios_release_device(). So we have to remove + * that here explicitly. + */ + if (edev->pdev) { + eeh_rmv_from_parent_pe(edev); + eeh_addr_cache_rmv_dev(edev->pdev); + eeh_sysfs_remove_device(edev->pdev); + edev->mode &= ~EEH_DEV_SYSFS; + + edev->pdev = NULL; + dev->dev.archdata.edev = NULL; + } + + edev->pdev = dev; + dev->dev.archdata.edev = edev; + + /* + * We have to do the EEH probe here because the PCI device + * hasn't been created yet in the early stage. + */ + if (eeh_probe_mode_dev()) + eeh_ops->dev_probe(dev, NULL); + + eeh_addr_cache_insert_dev(dev); +} + +/** + * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus + * @bus: PCI bus + * + * This routine must be used to perform EEH initialization for PCI + * devices which are attached to the indicated PCI bus. The PCI bus + * is added after system boot through hotplug or dlpar. + */ +void eeh_add_device_tree_late(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + eeh_add_device_late(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + struct pci_bus *subbus = dev->subordinate; + if (subbus) + eeh_add_device_tree_late(subbus); + } + } +} +EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); + +/** + * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus + * @bus: PCI bus + * + * This routine must be used to add EEH sysfs files for PCI + * devices which are attached to the indicated PCI bus. The PCI bus + * is added after system boot through hotplug or dlpar. + */ +void eeh_add_sysfs_files(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + eeh_sysfs_add_device(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + struct pci_bus *subbus = dev->subordinate; + if (subbus) + eeh_add_sysfs_files(subbus); + } + } +} +EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); + +/** + * eeh_remove_device - Undo EEH setup for the indicated pci device + * @dev: pci device to be removed + * + * This routine should be called when a device is removed from + * a running system (e.g. by hotplug or dlpar). It unregisters + * the PCI device from the EEH subsystem. I/O errors affecting + * this device will no longer be detected after this call; thus, + * i/o errors affecting this slot may leave this device unusable. + */ +void eeh_remove_device(struct pci_dev *dev) +{ + struct eeh_dev *edev; + + if (!dev || !eeh_subsystem_enabled) + return; + edev = pci_dev_to_eeh_dev(dev); + + /* Unregister the device with the EEH/PCI address search system */ + pr_debug("EEH: Removing device %s\n", pci_name(dev)); + + if (!edev || !edev->pdev || !edev->pe) { + pr_debug("EEH: Not referenced !\n"); + return; + } + + /* + * During the hotplug for EEH error recovery, we need the EEH + * device attached to the parent PE in order for BAR restore + * a bit later. So we keep it for BAR restore and remove it + * from the parent PE during the BAR resotre. + */ + edev->pdev = NULL; + dev->dev.archdata.edev = NULL; + if (!(edev->pe->state & EEH_PE_KEEP)) + eeh_rmv_from_parent_pe(edev); + else + edev->mode |= EEH_DEV_DISCONNECTED; + + eeh_addr_cache_rmv_dev(dev); + eeh_sysfs_remove_device(dev); + edev->mode &= ~EEH_DEV_SYSFS; +} + +static int proc_eeh_show(struct seq_file *m, void *v) +{ + if (0 == eeh_subsystem_enabled) { + seq_printf(m, "EEH Subsystem is globally disabled\n"); + seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); + } else { + seq_printf(m, "EEH Subsystem is enabled\n"); + seq_printf(m, + "no device=%llu\n" + "no device node=%llu\n" + "no config address=%llu\n" + "check not wanted=%llu\n" + "eeh_total_mmio_ffs=%llu\n" + "eeh_false_positives=%llu\n" + "eeh_slot_resets=%llu\n", + eeh_stats.no_device, + eeh_stats.no_dn, + eeh_stats.no_cfg_addr, + eeh_stats.ignored_check, + eeh_stats.total_mmio_ffs, + eeh_stats.false_positives, + eeh_stats.slot_resets); + } + + return 0; +} + +static int proc_eeh_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_eeh_show, NULL); +} + +static const struct file_operations proc_eeh_operations = { + .open = proc_eeh_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init eeh_init_proc(void) +{ + if (machine_is(pseries) || machine_is(powernv)) + proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); + return 0; +} +__initcall(eeh_init_proc); diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c new file mode 100644 index 000000000000..e8c9fd546a5c --- /dev/null +++ b/arch/powerpc/kernel/eeh_cache.c @@ -0,0 +1,310 @@ +/* + * PCI address cache; allows the lookup of PCI devices based on I/O address + * + * Copyright IBM Corporation 2004 + * Copyright Linas Vepstas <linas@austin.ibm.com> 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/rbtree.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + + +/** + * The pci address cache subsystem. This subsystem places + * PCI device address resources into a red-black tree, sorted + * according to the address range, so that given only an i/o + * address, the corresponding PCI device can be **quickly** + * found. It is safe to perform an address lookup in an interrupt + * context; this ability is an important feature. + * + * Currently, the only customer of this code is the EEH subsystem; + * thus, this code has been somewhat tailored to suit EEH better. + * In particular, the cache does *not* hold the addresses of devices + * for which EEH is not enabled. + * + * (Implementation Note: The RB tree seems to be better/faster + * than any hash algo I could think of for this problem, even + * with the penalty of slow pointer chases for d-cache misses). + */ +struct pci_io_addr_range { + struct rb_node rb_node; + unsigned long addr_lo; + unsigned long addr_hi; + struct eeh_dev *edev; + struct pci_dev *pcidev; + unsigned int flags; +}; + +static struct pci_io_addr_cache { + struct rb_root rb_root; + spinlock_t piar_lock; +} pci_io_addr_cache_root; + +static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) +{ + struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; + + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (addr < piar->addr_lo) + n = n->rb_left; + else if (addr > piar->addr_hi) + n = n->rb_right; + else + return piar->edev; + } + + return NULL; +} + +/** + * eeh_addr_cache_get_dev - Get device, given only address + * @addr: mmio (PIO) phys address or i/o port number + * + * Given an mmio phys address, or a port number, find a pci device + * that implements this address. Be sure to pci_dev_put the device + * when finished. I/O port numbers are assumed to be offset + * from zero (that is, they do *not* have pci_io_addr added in). + * It is safe to call this function within an interrupt. + */ +struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr) +{ + struct eeh_dev *edev; + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + edev = __eeh_addr_cache_get_device(addr); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); + return edev; +} + +#ifdef DEBUG +/* + * Handy-dandy debug print routine, does nothing more + * than print out the contents of our addr cache. + */ +static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) +{ + struct rb_node *n; + int cnt = 0; + + n = rb_first(&cache->rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", + (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, + piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + cnt++; + n = rb_next(n); + } +} +#endif + +/* Insert address range into the rb tree. */ +static struct pci_io_addr_range * +eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, + unsigned long ahi, unsigned int flags) +{ + struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; + struct rb_node *parent = NULL; + struct pci_io_addr_range *piar; + + /* Walk tree, find a place to insert into tree */ + while (*p) { + parent = *p; + piar = rb_entry(parent, struct pci_io_addr_range, rb_node); + if (ahi < piar->addr_lo) { + p = &parent->rb_left; + } else if (alo > piar->addr_hi) { + p = &parent->rb_right; + } else { + if (dev != piar->pcidev || + alo != piar->addr_lo || ahi != piar->addr_hi) { + pr_warning("PIAR: overlapping address range\n"); + } + return piar; + } + } + piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + if (!piar) + return NULL; + + piar->addr_lo = alo; + piar->addr_hi = ahi; + piar->edev = pci_dev_to_eeh_dev(dev); + piar->pcidev = dev; + piar->flags = flags; + +#ifdef DEBUG + pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", + alo, ahi, pci_name(dev)); +#endif + + rb_link_node(&piar->rb_node, parent, p); + rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); + + return piar; +} + +static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) +{ + struct device_node *dn; + struct eeh_dev *edev; + int i; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev)); + return; + } + + edev = of_node_to_eeh_dev(dn); + if (!edev) { + pr_warning("PCI: no EEH dev found for dn=%s\n", + dn->full_name); + return; + } + + /* Skip any devices for which EEH is not enabled. */ + if (!eeh_probe_mode_dev() && !edev->pe) { +#ifdef DEBUG + pr_info("PCI: skip building address cache for=%s - %s\n", + pci_name(dev), dn->full_name); +#endif + return; + } + + /* Walk resources on this device, poke them into the tree */ + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + unsigned long start = pci_resource_start(dev,i); + unsigned long end = pci_resource_end(dev,i); + unsigned int flags = pci_resource_flags(dev,i); + + /* We are interested only bus addresses, not dma or other stuff */ + if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if (start == 0 || ~start == 0 || end == 0 || ~end == 0) + continue; + eeh_addr_cache_insert(dev, start, end, flags); + } +} + +/** + * eeh_addr_cache_insert_dev - Add a device to the address cache + * @dev: PCI device whose I/O addresses we are interested in. + * + * In order to support the fast lookup of devices based on addresses, + * we maintain a cache of devices that can be quickly searched. + * This routine adds a device to that cache. + */ +void eeh_addr_cache_insert_dev(struct pci_dev *dev) +{ + unsigned long flags; + + /* Ignore PCI bridges */ + if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) + return; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __eeh_addr_cache_insert_dev(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev) +{ + struct rb_node *n; + +restart: + n = rb_first(&pci_io_addr_cache_root.rb_root); + while (n) { + struct pci_io_addr_range *piar; + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + if (piar->pcidev == dev) { + rb_erase(n, &pci_io_addr_cache_root.rb_root); + kfree(piar); + goto restart; + } + n = rb_next(n); + } +} + +/** + * eeh_addr_cache_rmv_dev - remove pci device from addr cache + * @dev: device to remove + * + * Remove a device from the addr-cache tree. + * This is potentially expensive, since it will walk + * the tree multiple times (once per resource). + * But so what; device removal doesn't need to be that fast. + */ +void eeh_addr_cache_rmv_dev(struct pci_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); + __eeh_addr_cache_rmv_dev(dev); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); +} + +/** + * eeh_addr_cache_build - Build a cache of I/O addresses + * + * Build a cache of pci i/o addresses. This cache will be used to + * find the pci device that corresponds to a given address. + * This routine scans all pci busses to build the cache. + * Must be run late in boot process, after the pci controllers + * have been scanned for devices (after all device resources are known). + */ +void eeh_addr_cache_build(void) +{ + struct device_node *dn; + struct eeh_dev *edev; + struct pci_dev *dev = NULL; + + spin_lock_init(&pci_io_addr_cache_root.piar_lock); + + for_each_pci_dev(dev) { + dn = pci_device_to_OF_node(dev); + if (!dn) + continue; + + edev = of_node_to_eeh_dev(dn); + if (!edev) + continue; + + dev->dev.archdata.edev = edev; + edev->pdev = dev; + + eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); + } + +#ifdef DEBUG + /* Verify tree built up above, echo back the list of addrs. */ + eeh_addr_cache_print(&pci_io_addr_cache_root); +#endif +} diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c new file mode 100644 index 000000000000..1efa28f5fc54 --- /dev/null +++ b/arch/powerpc/kernel/eeh_dev.c @@ -0,0 +1,112 @@ +/* + * The file intends to implement dynamic creation of EEH device, which will + * be bound with OF node and PCI device simutaneously. The EEH devices would + * be foundamental information for EEH core components to work proerly. Besides, + * We have to support multiple situations where dynamic creation of EEH device + * is required: + * + * 1) Before PCI emunation starts, we need create EEH devices according to the + * PCI sensitive OF nodes. + * 2) When PCI emunation is done, we need do the binding between PCI device and + * the associated EEH device. + * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device + * will be created while PCI sensitive OF node is detected from DR. + * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If + * PHB is newly inserted, we also need create EEH devices accordingly. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/export.h> +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + +/** + * eeh_dev_init - Create EEH device according to OF node + * @dn: device node + * @data: PHB + * + * It will create EEH device according to the given OF node. The function + * might be called by PCI emunation, DR, PHB hotplug. + */ +void *eeh_dev_init(struct device_node *dn, void *data) +{ + struct pci_controller *phb = data; + struct eeh_dev *edev; + + /* Allocate EEH device */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) { + pr_warning("%s: out of memory\n", __func__); + return NULL; + } + + /* Associate EEH device with OF node */ + PCI_DN(dn)->edev = edev; + edev->dn = dn; + edev->phb = phb; + INIT_LIST_HEAD(&edev->list); + + return NULL; +} + +/** + * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB + * @phb: PHB + * + * Scan the PHB OF node and its child association, then create the + * EEH devices accordingly + */ +void eeh_dev_phb_init_dynamic(struct pci_controller *phb) +{ + struct device_node *dn = phb->dn; + + /* EEH PE for PHB */ + eeh_phb_pe_create(phb); + + /* EEH device for PHB */ + eeh_dev_init(dn, phb); + + /* EEH devices for children OF nodes */ + traverse_pci_devices(dn, eeh_dev_init, phb); +} + +/** + * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs + * + * Scan all the existing PHBs and create EEH devices for their OF + * nodes and their children OF nodes + */ +static int __init eeh_dev_phb_init(void) +{ + struct pci_controller *phb, *tmp; + + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) + eeh_dev_phb_init_dynamic(phb); + + pr_info("EEH: devices created\n"); + + return 0; +} + +core_initcall(eeh_dev_phb_init); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c new file mode 100644 index 000000000000..36bed5a12750 --- /dev/null +++ b/arch/powerpc/kernel/eeh_driver.c @@ -0,0 +1,732 @@ +/* + * PCI Error Recovery Driver for RPA-compliant PPC64 platform. + * Copyright IBM Corp. 2004 2005 + * Copyright Linas Vepstas <linas@linas.org> 2004, 2005 + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> + */ +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/ppc-pci.h> +#include <asm/pci-bridge.h> +#include <asm/prom.h> +#include <asm/rtas.h> + +/** + * eeh_pcid_name - Retrieve name of PCI device driver + * @pdev: PCI device + * + * This routine is used to retrieve the name of PCI device driver + * if that's valid. + */ +static inline const char *eeh_pcid_name(struct pci_dev *pdev) +{ + if (pdev && pdev->dev.driver) + return pdev->dev.driver->name; + return ""; +} + +/** + * eeh_pcid_get - Get the PCI device driver + * @pdev: PCI device + * + * The function is used to retrieve the PCI device driver for + * the indicated PCI device. Besides, we will increase the reference + * of the PCI device driver to prevent that being unloaded on + * the fly. Otherwise, kernel crash would be seen. + */ +static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) +{ + if (!pdev || !pdev->driver) + return NULL; + + if (!try_module_get(pdev->driver->driver.owner)) + return NULL; + + return pdev->driver; +} + +/** + * eeh_pcid_put - Dereference on the PCI device driver + * @pdev: PCI device + * + * The function is called to do dereference on the PCI device + * driver of the indicated PCI device. + */ +static inline void eeh_pcid_put(struct pci_dev *pdev) +{ + if (!pdev || !pdev->driver) + return; + + module_put(pdev->driver->driver.owner); +} + +#if 0 +static void print_device_node_tree(struct pci_dn *pdn, int dent) +{ + int i; + struct device_node *pc; + + if (!pdn) + return; + for (i = 0; i < dent; i++) + printk(" "); + printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n", + pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr, + pdn->eeh_pe_config_addr, pdn->node->full_name); + dent += 3; + pc = pdn->node->child; + while (pc) { + print_device_node_tree(PCI_DN(pc), dent); + pc = pc->sibling; + } +} +#endif + +/** + * eeh_disable_irq - Disable interrupt for the recovering device + * @dev: PCI device + * + * This routine must be called when reporting temporary or permanent + * error to the particular PCI device to disable interrupt of that + * device. If the device has enabled MSI or MSI-X interrupt, we needn't + * do real work because EEH should freeze DMA transfers for those PCI + * devices encountering EEH errors, which includes MSI or MSI-X. + */ +static void eeh_disable_irq(struct pci_dev *dev) +{ + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + + /* Don't disable MSI and MSI-X interrupts. They are + * effectively disabled by the DMA Stopped state + * when an EEH error occurs. + */ + if (dev->msi_enabled || dev->msix_enabled) + return; + + if (!irq_has_action(dev->irq)) + return; + + edev->mode |= EEH_DEV_IRQ_DISABLED; + disable_irq_nosync(dev->irq); +} + +/** + * eeh_enable_irq - Enable interrupt for the recovering device + * @dev: PCI device + * + * This routine must be called to enable interrupt while failed + * device could be resumed. + */ +static void eeh_enable_irq(struct pci_dev *dev) +{ + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + struct irq_desc *desc; + + if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { + edev->mode &= ~EEH_DEV_IRQ_DISABLED; + + desc = irq_to_desc(dev->irq); + if (desc && desc->depth > 0) + enable_irq(dev->irq); + } +} + +/** + * eeh_report_error - Report pci error to each device driver + * @data: eeh device + * @userdata: return value + * + * Report an EEH error to each device driver, collect up and + * merge the device driver responses. Cumulative response + * passed back in "userdata". + */ +static void *eeh_report_error(void *data, void *userdata) +{ + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + enum pci_ers_result rc, *res = userdata; + struct pci_driver *driver; + + /* We might not have the associated PCI device, + * then we should continue for next one. + */ + if (!dev) return NULL; + dev->error_state = pci_channel_io_frozen; + + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + + eeh_disable_irq(dev); + + if (!driver->err_handler || + !driver->err_handler->error_detected) { + eeh_pcid_put(dev); + return NULL; + } + + rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); + + /* A driver that needs a reset trumps all others */ + if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; + if (*res == PCI_ERS_RESULT_NONE) *res = rc; + + eeh_pcid_put(dev); + return NULL; +} + +/** + * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled + * @data: eeh device + * @userdata: return value + * + * Tells each device driver that IO ports, MMIO and config space I/O + * are now enabled. Collects up and merges the device driver responses. + * Cumulative response passed back in "userdata". + */ +static void *eeh_report_mmio_enabled(void *data, void *userdata) +{ + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + enum pci_ers_result rc, *res = userdata; + struct pci_driver *driver; + + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + + if (!driver->err_handler || + !driver->err_handler->mmio_enabled) { + eeh_pcid_put(dev); + return NULL; + } + + rc = driver->err_handler->mmio_enabled(dev); + + /* A driver that needs a reset trumps all others */ + if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; + if (*res == PCI_ERS_RESULT_NONE) *res = rc; + + eeh_pcid_put(dev); + return NULL; +} + +/** + * eeh_report_reset - Tell device that slot has been reset + * @data: eeh device + * @userdata: return value + * + * This routine must be called while EEH tries to reset particular + * PCI device so that the associated PCI device driver could take + * some actions, usually to save data the driver needs so that the + * driver can work again while the device is recovered. + */ +static void *eeh_report_reset(void *data, void *userdata) +{ + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + enum pci_ers_result rc, *res = userdata; + struct pci_driver *driver; + + if (!dev) return NULL; + dev->error_state = pci_channel_io_normal; + + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + + eeh_enable_irq(dev); + + if (!driver->err_handler || + !driver->err_handler->slot_reset) { + eeh_pcid_put(dev); + return NULL; + } + + rc = driver->err_handler->slot_reset(dev); + if ((*res == PCI_ERS_RESULT_NONE) || + (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; + if (*res == PCI_ERS_RESULT_DISCONNECT && + rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; + + eeh_pcid_put(dev); + return NULL; +} + +/** + * eeh_report_resume - Tell device to resume normal operations + * @data: eeh device + * @userdata: return value + * + * This routine must be called to notify the device driver that it + * could resume so that the device driver can do some initialization + * to make the recovered device work again. + */ +static void *eeh_report_resume(void *data, void *userdata) +{ + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_driver *driver; + + if (!dev) return NULL; + dev->error_state = pci_channel_io_normal; + + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + + eeh_enable_irq(dev); + + if (!driver->err_handler || + !driver->err_handler->resume) { + eeh_pcid_put(dev); + return NULL; + } + + driver->err_handler->resume(dev); + + eeh_pcid_put(dev); + return NULL; +} + +/** + * eeh_report_failure - Tell device driver that device is dead. + * @data: eeh device + * @userdata: return value + * + * This informs the device driver that the device is permanently + * dead, and that no further recovery attempts will be made on it. + */ +static void *eeh_report_failure(void *data, void *userdata) +{ + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_driver *driver; + + if (!dev) return NULL; + dev->error_state = pci_channel_io_perm_failure; + + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + + eeh_disable_irq(dev); + + if (!driver->err_handler || + !driver->err_handler->error_detected) { + eeh_pcid_put(dev); + return NULL; + } + + driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); + + eeh_pcid_put(dev); + return NULL; +} + +static void *eeh_rmv_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + int *removed = (int *)userdata; + + /* + * Actually, we should remove the PCI bridges as well. + * However, that's lots of complexity to do that, + * particularly some of devices under the bridge might + * support EEH. So we just care about PCI devices for + * simplicity here. + */ + if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) + return NULL; + driver = eeh_pcid_get(dev); + if (driver && driver->err_handler) + return NULL; + + /* Remove it from PCI subsystem */ + pr_debug("EEH: Removing %s without EEH sensitive driver\n", + pci_name(dev)); + edev->bus = dev->bus; + edev->mode |= EEH_DEV_DISCONNECTED; + (*removed)++; + + pci_stop_and_remove_bus_device(dev); + + return NULL; +} + +static void *eeh_pe_detach_dev(void *data, void *userdata) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev, *tmp; + + eeh_pe_for_each_dev(pe, edev, tmp) { + if (!(edev->mode & EEH_DEV_DISCONNECTED)) + continue; + + edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); + eeh_rmv_from_parent_pe(edev); + } + + return NULL; +} + +/** + * eeh_reset_device - Perform actual reset of a pci slot + * @pe: EEH PE + * @bus: PCI bus corresponding to the isolcated slot + * + * This routine must be called to do reset on the indicated PE. + * During the reset, udev might be invoked because those affected + * PCI devices will be removed and then added. + */ +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) +{ + struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); + struct timeval tstamp; + int cnt, rc, removed = 0; + + /* pcibios will clear the counter; save the value */ + cnt = pe->freeze_count; + tstamp = pe->tstamp; + + /* + * We don't remove the corresponding PE instances because + * we need the information afterwords. The attached EEH + * devices are expected to be attached soon when calling + * into pcibios_add_pci_devices(). + */ + eeh_pe_state_mark(pe, EEH_PE_KEEP); + if (bus) + pcibios_remove_pci_devices(bus); + else if (frozen_bus) + eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + + /* Reset the pci controller. (Asserts RST#; resets config space). + * Reconfigure bridges and devices. Don't try to bring the system + * up if the reset failed for some reason. + */ + rc = eeh_reset_pe(pe); + if (rc) + return rc; + + /* Restore PE */ + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); + + /* Give the system 5 seconds to finish running the user-space + * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, + * this is a hack, but if we don't do this, and try to bring + * the device up before the scripts have taken it down, + * potentially weird things happen. + */ + if (bus) { + pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); + ssleep(5); + + /* + * The EEH device is still connected with its parent + * PE. We should disconnect it so the binding can be + * rebuilt when adding PCI devices. + */ + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); + pcibios_add_pci_devices(bus); + } else if (frozen_bus && removed) { + pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); + ssleep(5); + + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); + pcibios_add_pci_devices(frozen_bus); + } + eeh_pe_state_clear(pe, EEH_PE_KEEP); + + pe->tstamp = tstamp; + pe->freeze_count = cnt; + + return 0; +} + +/* The longest amount of time to wait for a pci device + * to come back on line, in seconds. + */ +#define MAX_WAIT_FOR_RECOVERY 150 + +static void eeh_handle_normal_event(struct eeh_pe *pe) +{ + struct pci_bus *frozen_bus; + int rc = 0; + enum pci_ers_result result = PCI_ERS_RESULT_NONE; + + frozen_bus = eeh_pe_bus_get(pe); + if (!frozen_bus) { + pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->addr); + return; + } + + eeh_pe_update_time_stamp(pe); + pe->freeze_count++; + if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) + goto excess_failures; + pr_warning("EEH: This PCI device has failed %d times in the last hour\n", + pe->freeze_count); + + /* Walk the various device drivers attached to this slot through + * a reset sequence, giving each an opportunity to do what it needs + * to accomplish the reset. Each child gets a report of the + * status ... if any child can't handle the reset, then the entire + * slot is dlpar removed and added. + */ + pr_info("EEH: Notify device drivers to shutdown\n"); + eeh_pe_dev_traverse(pe, eeh_report_error, &result); + + /* Get the current PCI slot state. This can take a long time, + * sometimes over 3 seconds for certain systems. + */ + rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { + pr_warning("EEH: Permanent failure\n"); + goto hard_fail; + } + + /* Since rtas may enable MMIO when posting the error log, + * don't post the error log until after all dev drivers + * have been informed. + */ + pr_info("EEH: Collect temporary log\n"); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); + + /* If all device drivers were EEH-unaware, then shut + * down all of the device drivers, and hope they + * go down willingly, without panicing the system. + */ + if (result == PCI_ERS_RESULT_NONE) { + pr_info("EEH: Reset with hotplug activity\n"); + rc = eeh_reset_device(pe, frozen_bus); + if (rc) { + pr_warning("%s: Unable to reset, err=%d\n", + __func__, rc); + goto hard_fail; + } + } + + /* If all devices reported they can proceed, then re-enable MMIO */ + if (result == PCI_ERS_RESULT_CAN_RECOVER) { + pr_info("EEH: Enable I/O for affected devices\n"); + rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + + if (rc < 0) + goto hard_fail; + if (rc) { + result = PCI_ERS_RESULT_NEED_RESET; + } else { + pr_info("EEH: Notify device drivers to resume I/O\n"); + result = PCI_ERS_RESULT_NONE; + eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); + } + } + + /* If all devices reported they can proceed, then re-enable DMA */ + if (result == PCI_ERS_RESULT_CAN_RECOVER) { + pr_info("EEH: Enabled DMA for affected devices\n"); + rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + + if (rc < 0) + goto hard_fail; + if (rc) + result = PCI_ERS_RESULT_NEED_RESET; + else + result = PCI_ERS_RESULT_RECOVERED; + } + + /* If any device has a hard failure, then shut off everything. */ + if (result == PCI_ERS_RESULT_DISCONNECT) { + pr_warning("EEH: Device driver gave up\n"); + goto hard_fail; + } + + /* If any device called out for a reset, then reset the slot */ + if (result == PCI_ERS_RESULT_NEED_RESET) { + pr_info("EEH: Reset without hotplug activity\n"); + rc = eeh_reset_device(pe, NULL); + if (rc) { + pr_warning("%s: Cannot reset, err=%d\n", + __func__, rc); + goto hard_fail; + } + + pr_info("EEH: Notify device drivers " + "the completion of reset\n"); + result = PCI_ERS_RESULT_NONE; + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + } + + /* All devices should claim they have recovered by now. */ + if ((result != PCI_ERS_RESULT_RECOVERED) && + (result != PCI_ERS_RESULT_NONE)) { + pr_warning("EEH: Not recovered\n"); + goto hard_fail; + } + + /* Tell all device drivers that they can resume operations */ + pr_info("EEH: Notify device driver to resume\n"); + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + + return; + +excess_failures: + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n" + "Please try reseating or replacing it.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); + goto perm_error; + +hard_fail: + pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); + +perm_error: + eeh_slot_error_detail(pe, EEH_LOG_PERM); + + /* Notify all devices that they're about to go down. */ + eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + + /* Shut down the device drivers for good. */ + if (frozen_bus) + pcibios_remove_pci_devices(frozen_bus); +} + +static void eeh_handle_special_event(void) +{ + struct eeh_pe *pe, *phb_pe; + struct pci_bus *bus; + struct pci_controller *hose, *tmp; + unsigned long flags; + int rc = 0; + + /* + * The return value from next_error() has been classified as follows. + * It might be good to enumerate them. However, next_error() is only + * supported by PowerNV platform for now. So it would be fine to use + * integer directly: + * + * 4 - Dead IOC 3 - Dead PHB + * 2 - Fenced PHB 1 - Frozen PE + * 0 - No error found + * + */ + rc = eeh_ops->next_error(&pe); + if (rc <= 0) + return; + + switch (rc) { + case 4: + /* Mark all PHBs in dead state */ + eeh_serialize_lock(&flags); + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) continue; + + eeh_pe_state_mark(phb_pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + } + eeh_serialize_unlock(flags); + + /* Purge all events */ + eeh_remove_event(NULL); + break; + case 3: + case 2: + case 1: + /* Mark the PE in fenced state */ + eeh_serialize_lock(&flags); + if (rc == 3) + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + else + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_RECOVERING); + eeh_serialize_unlock(flags); + + /* Purge all events of the PHB */ + eeh_remove_event(pe); + break; + default: + pr_err("%s: Invalid value %d from next_error()\n", + __func__, rc); + return; + } + + /* + * For fenced PHB and frozen PE, it's handled as normal + * event. We have to remove the affected PHBs for dead + * PHB and IOC + */ + if (rc == 2 || rc == 1) + eeh_handle_normal_event(pe); + else { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) + continue; + + bus = eeh_pe_bus_get(phb_pe); + /* Notify all devices that they're about to go down. */ + eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + pcibios_remove_pci_devices(bus); + } + } +} + +/** + * eeh_handle_event - Reset a PCI device after hard lockup. + * @pe: EEH PE + * + * While PHB detects address or data parity errors on particular PCI + * slot, the associated PE will be frozen. Besides, DMA's occurring + * to wild addresses (which usually happen due to bugs in device + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, + * #PERR or other misc PCI-related errors also can trigger EEH errors. + * + * Recovery process consists of unplugging the device driver (which + * generated hotplug events to userspace), then issuing a PCI #RST to + * the device, then reconfiguring the PCI config space for all bridges + * & devices under this slot, and then finally restarting the device + * drivers (which cause a second set of hotplug events to go out to + * userspace). + */ +void eeh_handle_event(struct eeh_pe *pe) +{ + if (pe) + eeh_handle_normal_event(pe); + else + eeh_handle_special_event(); +} diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c new file mode 100644 index 000000000000..d27c5afc90ae --- /dev/null +++ b/arch/powerpc/kernel/eeh_event.c @@ -0,0 +1,182 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2005 Linas Vepstas <linas@linas.org> + */ + +#include <linux/delay.h> +#include <linux/list.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/kthread.h> +#include <asm/eeh_event.h> +#include <asm/ppc-pci.h> + +/** Overview: + * EEH error states may be detected within exception handlers; + * however, the recovery processing needs to occur asynchronously + * in a normal kernel context and not an interrupt context. + * This pair of routines creates an event and queues it onto a + * work-queue, where a worker thread can drive recovery. + */ + +static DEFINE_SPINLOCK(eeh_eventlist_lock); +static struct semaphore eeh_eventlist_sem; +LIST_HEAD(eeh_eventlist); + +/** + * eeh_event_handler - Dispatch EEH events. + * @dummy - unused + * + * The detection of a frozen slot can occur inside an interrupt, + * where it can be hard to do anything about it. The goal of this + * routine is to pull these detection events out of the context + * of the interrupt handler, and re-dispatch them for processing + * at a later time in a normal context. + */ +static int eeh_event_handler(void * dummy) +{ + unsigned long flags; + struct eeh_event *event; + struct eeh_pe *pe; + + while (!kthread_should_stop()) { + if (down_interruptible(&eeh_eventlist_sem)) + break; + + /* Fetch EEH event from the queue */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + event = NULL; + if (!list_empty(&eeh_eventlist)) { + event = list_entry(eeh_eventlist.next, + struct eeh_event, list); + list_del(&event->list); + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + if (!event) + continue; + + /* We might have event without binding PE */ + pe = event->pe; + if (pe) { + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); + eeh_handle_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + } else { + eeh_handle_event(NULL); + } + + kfree(event); + } + + return 0; +} + +/** + * eeh_event_init - Start kernel thread to handle EEH events + * + * This routine is called to start the kernel thread for processing + * EEH event. + */ +int eeh_event_init(void) +{ + struct task_struct *t; + int ret = 0; + + /* Initialize semaphore */ + sema_init(&eeh_eventlist_sem, 0); + + t = kthread_run(eeh_event_handler, NULL, "eehd"); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + pr_err("%s: Failed to start EEH daemon (%d)\n", + __func__, ret); + return ret; + } + + return 0; +} + +/** + * eeh_send_failure_event - Generate a PCI error event + * @pe: EEH PE + * + * This routine can be called within an interrupt context; + * the actual event will be delivered in a normal context + * (from a workqueue). + */ +int eeh_send_failure_event(struct eeh_pe *pe) +{ + unsigned long flags; + struct eeh_event *event; + + event = kzalloc(sizeof(*event), GFP_ATOMIC); + if (!event) { + pr_err("EEH: out of memory, event not handled\n"); + return -ENOMEM; + } + event->pe = pe; + + /* We may or may not be called in an interrupt context */ + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_add(&event->list, &eeh_eventlist); + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); + + /* For EEH deamon to knick in */ + up(&eeh_eventlist_sem); + + return 0; +} + +/** + * eeh_remove_event - Remove EEH event from the queue + * @pe: Event binding to the PE + * + * On PowerNV platform, we might have subsequent coming events + * is part of the former one. For that case, those subsequent + * coming events are totally duplicated and unnecessary, thus + * they should be removed. + */ +void eeh_remove_event(struct eeh_pe *pe) +{ + unsigned long flags; + struct eeh_event *event, *tmp; + + spin_lock_irqsave(&eeh_eventlist_lock, flags); + list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { + /* + * If we don't have valid PE passed in, that means + * we already have event corresponding to dead IOC + * and all events should be purged. + */ + if (!pe) { + list_del(&event->list); + kfree(event); + } else if (pe->type & EEH_PE_PHB) { + if (event->pe && event->pe->phb == pe->phb) { + list_del(&event->list); + kfree(event); + } + } else if (event->pe == pe) { + list_del(&event->list); + kfree(event); + } + } + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); +} diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c new file mode 100644 index 000000000000..f9450537e335 --- /dev/null +++ b/arch/powerpc/kernel/eeh_pe.c @@ -0,0 +1,792 @@ +/* + * The file intends to implement PE based on the information from + * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device. + * All the PEs should be organized as hierarchy tree. The first level + * of the tree will be associated to existing PHBs since the particular + * PE is only meaningful in one PHB domain. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + +static LIST_HEAD(eeh_phb_pe); + +/** + * eeh_pe_alloc - Allocate PE + * @phb: PCI controller + * @type: PE type + * + * Allocate PE instance dynamically. + */ +static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) +{ + struct eeh_pe *pe; + + /* Allocate PHB PE */ + pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL); + if (!pe) return NULL; + + /* Initialize PHB PE */ + pe->type = type; + pe->phb = phb; + INIT_LIST_HEAD(&pe->child_list); + INIT_LIST_HEAD(&pe->child); + INIT_LIST_HEAD(&pe->edevs); + + return pe; +} + +/** + * eeh_phb_pe_create - Create PHB PE + * @phb: PCI controller + * + * The function should be called while the PHB is detected during + * system boot or PCI hotplug in order to create PHB PE. + */ +int eeh_phb_pe_create(struct pci_controller *phb) +{ + struct eeh_pe *pe; + + /* Allocate PHB PE */ + pe = eeh_pe_alloc(phb, EEH_PE_PHB); + if (!pe) { + pr_err("%s: out of memory!\n", __func__); + return -ENOMEM; + } + + /* Put it into the list */ + list_add_tail(&pe->child, &eeh_phb_pe); + + pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); + + return 0; +} + +/** + * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB + * @phb: PCI controller + * + * The overall PEs form hierarchy tree. The first layer of the + * hierarchy tree is composed of PHB PEs. The function is used + * to retrieve the corresponding PHB PE according to the given PHB. + */ +struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) +{ + struct eeh_pe *pe; + + list_for_each_entry(pe, &eeh_phb_pe, child) { + /* + * Actually, we needn't check the type since + * the PE for PHB has been determined when that + * was created. + */ + if ((pe->type & EEH_PE_PHB) && pe->phb == phb) + return pe; + } + + return NULL; +} + +/** + * eeh_pe_next - Retrieve the next PE in the tree + * @pe: current PE + * @root: root PE + * + * The function is used to retrieve the next PE in the + * hierarchy PE tree. + */ +static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, + struct eeh_pe *root) +{ + struct list_head *next = pe->child_list.next; + + if (next == &pe->child_list) { + while (1) { + if (pe == root) + return NULL; + next = pe->child.next; + if (next != &pe->parent->child_list) + break; + pe = pe->parent; + } + } + + return list_entry(next, struct eeh_pe, child); +} + +/** + * eeh_pe_traverse - Traverse PEs in the specified PHB + * @root: root PE + * @fn: callback + * @flag: extra parameter to callback + * + * The function is used to traverse the specified PE and its + * child PEs. The traversing is to be terminated once the + * callback returns something other than NULL, or no more PEs + * to be traversed. + */ +void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) +{ + struct eeh_pe *pe; + void *ret; + + for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + ret = fn(pe, flag); + if (ret) return ret; + } + + return NULL; +} + +/** + * eeh_pe_dev_traverse - Traverse the devices from the PE + * @root: EEH PE + * @fn: function callback + * @flag: extra parameter to callback + * + * The function is used to traverse the devices of the specified + * PE and its child PEs. + */ +void *eeh_pe_dev_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) +{ + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; + void *ret; + + if (!root) { + pr_warning("%s: Invalid PE %p\n", __func__, root); + return NULL; + } + + /* Traverse root PE */ + for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_pe_for_each_dev(pe, edev, tmp) { + ret = fn(edev, flag); + if (ret) + return ret; + } + } + + return NULL; +} + +/** + * __eeh_pe_get - Check the PE address + * @data: EEH PE + * @flag: EEH device + * + * For one particular PE, it can be identified by PE address + * or tranditional BDF address. BDF address is composed of + * Bus/Device/Function number. The extra data referred by flag + * indicates which type of address should be used. + */ +static void *__eeh_pe_get(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev = (struct eeh_dev *)flag; + + /* Unexpected PHB PE */ + if (pe->type & EEH_PE_PHB) + return NULL; + + /* We prefer PE address */ + if (edev->pe_config_addr && + (edev->pe_config_addr == pe->addr)) + return pe; + + /* Try BDF address */ + if (edev->config_addr && + (edev->config_addr == pe->config_addr)) + return pe; + + return NULL; +} + +/** + * eeh_pe_get - Search PE based on the given address + * @edev: EEH device + * + * Search the corresponding PE based on the specified address which + * is included in the eeh device. The function is used to check if + * the associated PE has been created against the PE address. It's + * notable that the PE address has 2 format: traditional PE address + * which is composed of PCI bus/device/function number, or unified + * PE address. + */ +struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) +{ + struct eeh_pe *root = eeh_phb_pe_get(edev->phb); + struct eeh_pe *pe; + + pe = eeh_pe_traverse(root, __eeh_pe_get, edev); + + return pe; +} + +/** + * eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct device_node *dn; + struct eeh_dev *parent; + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + dn = edev->dn->parent; + while (dn) { + /* We're poking out of PCI territory */ + if (!PCI_DN(dn)) return NULL; + + parent = of_node_to_eeh_dev(dn); + /* We're poking out of PCI territory */ + if (!parent) return NULL; + + if (parent->pe) + return parent->pe; + + dn = dn->parent; + } + + return NULL; +} + +/** + * eeh_add_to_parent_pe - Add EEH device to parent PE + * @edev: EEH device + * + * Add EEH device to the parent PE. If the parent PE already + * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, + * we have to create new PE to hold the EEH device and the new + * PE will be linked to its parent PE as well. + */ +int eeh_add_to_parent_pe(struct eeh_dev *edev) +{ + struct eeh_pe *pe, *parent; + + /* + * Search the PE has been existing or not according + * to the PE address. If that has been existing, the + * PE should be composed of PCI bus and its subordinate + * components. + */ + pe = eeh_pe_get(edev); + if (pe && !(pe->type & EEH_PE_INVALID)) { + if (!edev->pe_config_addr) { + pr_err("%s: PE with addr 0x%x already exists\n", + __func__, edev->config_addr); + return -EEXIST; + } + + /* Mark the PE as type of PCI bus */ + pe->type = EEH_PE_BUS; + edev->pe = pe; + + /* Put the edev to PE */ + list_add_tail(&edev->list, &pe->edevs); + pr_debug("EEH: Add %s to Bus PE#%x\n", + edev->dn->full_name, pe->addr); + + return 0; + } else if (pe && (pe->type & EEH_PE_INVALID)) { + list_add_tail(&edev->list, &pe->edevs); + edev->pe = pe; + /* + * We're running to here because of PCI hotplug caused by + * EEH recovery. We need clear EEH_PE_INVALID until the top. + */ + parent = pe; + while (parent) { + if (!(parent->type & EEH_PE_INVALID)) + break; + parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); + parent = parent->parent; + } + pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", + edev->dn->full_name, pe->addr, pe->parent->addr); + + return 0; + } + + /* Create a new EEH PE */ + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (!pe) { + pr_err("%s: out of memory!\n", __func__); + return -ENOMEM; + } + pe->addr = edev->pe_config_addr; + pe->config_addr = edev->config_addr; + + /* + * While doing PE reset, we probably hot-reset the + * upstream bridge. However, the PCI devices including + * the associated EEH devices might be removed when EEH + * core is doing recovery. So that won't safe to retrieve + * the bridge through downstream EEH device. We have to + * trace the parent PCI bus, then the upstream bridge. + */ + if (eeh_probe_mode_dev()) + pe->bus = eeh_dev_to_pci_dev(edev)->bus; + + /* + * Put the new EEH PE into hierarchy tree. If the parent + * can't be found, the newly created PE will be attached + * to PHB directly. Otherwise, we have to associate the + * PE with its parent. + */ + parent = eeh_pe_get_parent(edev); + if (!parent) { + parent = eeh_phb_pe_get(edev->phb); + if (!parent) { + pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", + __func__, edev->phb->global_number); + edev->pe = NULL; + kfree(pe); + return -EEXIST; + } + } + pe->parent = parent; + + /* + * Put the newly created PE into the child list and + * link the EEH device accordingly. + */ + list_add_tail(&pe->child, &parent->child_list); + list_add_tail(&edev->list, &pe->edevs); + edev->pe = pe; + pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", + edev->dn->full_name, pe->addr, pe->parent->addr); + + return 0; +} + +/** + * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * @edev: EEH device + * + * The PE hierarchy tree might be changed when doing PCI hotplug. + * Also, the PCI devices or buses could be removed from the system + * during EEH recovery. So we have to call the function remove the + * corresponding PE accordingly if necessary. + */ +int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +{ + struct eeh_pe *pe, *parent, *child; + int cnt; + + if (!edev->pe) { + pr_debug("%s: No PE found for EEH device %s\n", + __func__, edev->dn->full_name); + return -EEXIST; + } + + /* Remove the EEH device */ + pe = edev->pe; + edev->pe = NULL; + list_del(&edev->list); + + /* + * Check if the parent PE includes any EEH devices. + * If not, we should delete that. Also, we should + * delete the parent PE if it doesn't have associated + * child PEs and EEH devices. + */ + while (1) { + parent = pe->parent; + if (pe->type & EEH_PE_PHB) + break; + + if (!(pe->state & EEH_PE_KEEP)) { + if (list_empty(&pe->edevs) && + list_empty(&pe->child_list)) { + list_del(&pe->child); + kfree(pe); + } else { + break; + } + } else { + if (list_empty(&pe->edevs)) { + cnt = 0; + list_for_each_entry(child, &pe->child_list, child) { + if (!(child->type & EEH_PE_INVALID)) { + cnt++; + break; + } + } + + if (!cnt) + pe->type |= EEH_PE_INVALID; + else + break; + } + } + + pe = parent; + } + + return 0; +} + +/** + * eeh_pe_update_time_stamp - Update PE's frozen time stamp + * @pe: EEH PE + * + * We have time stamp for each PE to trace its time of getting + * frozen in last hour. The function should be called to update + * the time stamp on first error of the specific PE. On the other + * handle, we needn't account for errors happened in last hour. + */ +void eeh_pe_update_time_stamp(struct eeh_pe *pe) +{ + struct timeval tstamp; + + if (!pe) return; + + if (pe->freeze_count <= 0) { + pe->freeze_count = 0; + do_gettimeofday(&pe->tstamp); + } else { + do_gettimeofday(&tstamp); + if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) { + pe->tstamp = tstamp; + pe->freeze_count = 0; + } + } +} + +/** + * __eeh_pe_state_mark - Mark the state for the PE + * @data: EEH PE + * @flag: state + * + * The function is used to mark the indicated state for the given + * PE. Also, the associated PCI devices will be put into IO frozen + * state as well. + */ +static void *__eeh_pe_state_mark(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int state = *((int *)flag); + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + + /* + * Mark the PE with the indicated state. Also, + * the associated PCI device will be put into + * I/O frozen state to avoid I/O accesses from + * the PCI device driver. + */ + pe->state |= state; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + pdev->error_state = pci_channel_io_frozen; + } + + return NULL; +} + +/** + * eeh_pe_state_mark - Mark specified state for PE and its associated device + * @pe: EEH PE + * + * EEH error affects the current PE and its child PEs. The function + * is used to mark appropriate state for the affected PEs and the + * associated devices. + */ +void eeh_pe_state_mark(struct eeh_pe *pe, int state) +{ + eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); +} + +/** + * __eeh_pe_state_clear - Clear state for the PE + * @data: EEH PE + * @flag: state + * + * The function is used to clear the indicated state from the + * given PE. Besides, we also clear the check count of the PE + * as well. + */ +static void *__eeh_pe_state_clear(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int state = *((int *)flag); + + pe->state &= ~state; + pe->check_count = 0; + + return NULL; +} + +/** + * eeh_pe_state_clear - Clear state for the PE and its children + * @pe: PE + * @state: state to be cleared + * + * When the PE and its children has been recovered from error, + * we need clear the error state for that. The function is used + * for the purpose. + */ +void eeh_pe_state_clear(struct eeh_pe *pe, int state) +{ + eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); +} + +/* + * Some PCI bridges (e.g. PLX bridges) have primary/secondary + * buses assigned explicitly by firmware, and we probably have + * lost that after reset. So we have to delay the check until + * the PCI-CFG registers have been restored for the parent + * bridge. + * + * Don't use normal PCI-CFG accessors, which probably has been + * blocked on normal path during the stage. So we need utilize + * eeh operations, which is always permitted. + */ +static void eeh_bridge_check_link(struct eeh_dev *edev, + struct device_node *dn) +{ + int cap; + uint32_t val; + int timeout = 0; + + /* + * We only check root port and downstream ports of + * PCIe switches + */ + if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT))) + return; + + pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", + __func__, edev->phb->global_number, + edev->config_addr >> 8, + PCI_SLOT(edev->config_addr & 0xFF), + PCI_FUNC(edev->config_addr & 0xFF)); + + /* Check slot status */ + cap = edev->pcie_cap; + eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val); + if (!(val & PCI_EXP_SLTSTA_PDS)) { + pr_debug(" No card in the slot (0x%04x) !\n", val); + return; + } + + /* Check power status if we have the capability */ + eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val); + if (val & PCI_EXP_SLTCAP_PCP) { + eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val); + if (val & PCI_EXP_SLTCTL_PCC) { + pr_debug(" In power-off state, power it on ...\n"); + val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); + val |= (0x0100 & PCI_EXP_SLTCTL_PIC); + eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val); + msleep(2 * 1000); + } + } + + /* Enable link */ + eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val); + val &= ~PCI_EXP_LNKCTL_LD; + eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val); + + /* Check link */ + eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val); + if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { + pr_debug(" No link reporting capability (0x%08x) \n", val); + msleep(1000); + return; + } + + /* Wait the link is up until timeout (5s) */ + timeout = 0; + while (timeout < 5000) { + msleep(20); + timeout += 20; + + eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val); + if (val & PCI_EXP_LNKSTA_DLLLA) + break; + } + + if (val & PCI_EXP_LNKSTA_DLLLA) + pr_debug(" Link up (%s)\n", + (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB"); + else + pr_debug(" Link not ready (0x%04x)\n", val); +} + +#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) +#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) + +static void eeh_restore_bridge_bars(struct eeh_dev *edev, + struct device_node *dn) +{ + int i; + + /* + * Device BARs: 0x10 - 0x18 + * Bus numbers and windows: 0x18 - 0x30 + */ + for (i = 4; i < 13; i++) + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); + /* Rom: 0x38 */ + eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]); + + /* Cache line & Latency timer: 0xC 0xD */ + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + /* Max latency, min grant, interrupt ping and line: 0x3C */ + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); + + /* PCI Command: 0x4 */ + eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]); + + /* Check the PCIe link is ready */ + eeh_bridge_check_link(edev, dn); +} + +static void eeh_restore_device_bars(struct eeh_dev *edev, + struct device_node *dn) +{ + int i; + u32 cmd; + + for (i = 4; i < 10; i++) + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); + /* 12 == Expansion ROM Address */ + eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); + + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + + /* max latency, min grant, interrupt pin and line */ + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); + + /* + * Restore PERR & SERR bits, some devices require it, + * don't touch the other command bits + */ + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); + if (edev->config_space[1] & PCI_COMMAND_PARITY) + cmd |= PCI_COMMAND_PARITY; + else + cmd &= ~PCI_COMMAND_PARITY; + if (edev->config_space[1] & PCI_COMMAND_SERR) + cmd |= PCI_COMMAND_SERR; + else + cmd &= ~PCI_COMMAND_SERR; + eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); +} + +/** + * eeh_restore_one_device_bars - Restore the Base Address Registers for one device + * @data: EEH device + * @flag: Unused + * + * Loads the PCI configuration space base address registers, + * the expansion ROM base address, the latency timer, and etc. + * from the saved values in the device node. + */ +static void *eeh_restore_one_device_bars(void *data, void *flag) +{ + struct eeh_dev *edev = (struct eeh_dev *)data; + struct device_node *dn = eeh_dev_to_of_node(edev); + + /* Do special restore for bridges */ + if (edev->mode & EEH_DEV_BRIDGE) + eeh_restore_bridge_bars(edev, dn); + else + eeh_restore_device_bars(edev, dn); + + return NULL; +} + +/** + * eeh_pe_restore_bars - Restore the PCI config space info + * @pe: EEH PE + * + * This routine performs a recursive walk to the children + * of this device as well. + */ +void eeh_pe_restore_bars(struct eeh_pe *pe) +{ + /* + * We needn't take the EEH lock since eeh_pe_dev_traverse() + * will take that. + */ + eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL); +} + +/** + * eeh_pe_bus_get - Retrieve PCI bus according to the given PE + * @pe: EEH PE + * + * Retrieve the PCI bus according to the given PE. Basically, + * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the + * primary PCI bus will be retrieved. The parent bus will be + * returned for BUS PE. However, we don't have associated PCI + * bus for DEVICE PE. + */ +struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) +{ + struct pci_bus *bus = NULL; + struct eeh_dev *edev; + struct pci_dev *pdev; + + if (pe->type & EEH_PE_PHB) { + bus = pe->phb->bus; + } else if (pe->type & EEH_PE_BUS || + pe->type & EEH_PE_DEVICE) { + if (pe->bus) { + bus = pe->bus; + goto out; + } + + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + bus = pdev->bus; + } + +out: + return bus; +} diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c new file mode 100644 index 000000000000..5d753d4f2c75 --- /dev/null +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -0,0 +1,95 @@ +/* + * Sysfs entries for PCI Error Recovery for PAPR-compliant platform. + * Copyright IBM Corporation 2007 + * Copyright Linas Vepstas <linas@austin.ibm.com> 2007 + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> + */ +#include <linux/pci.h> +#include <linux/stat.h> +#include <asm/ppc-pci.h> +#include <asm/pci-bridge.h> + +/** + * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic + * @_name: name of file in sysfs directory + * @_memb: name of member in struct pci_dn to access + * @_format: printf format for display + * + * All of the attributes look very similar, so just + * auto-gen a cut-n-paste routine to display them. + */ +#define EEH_SHOW_ATTR(_name,_memb,_format) \ +static ssize_t eeh_show_##_name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct pci_dev *pdev = to_pci_dev(dev); \ + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \ + \ + if (!edev) \ + return 0; \ + \ + return sprintf(buf, _format "\n", edev->_memb); \ +} \ +static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); + +EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); +EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); +EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); + +void eeh_sysfs_add_device(struct pci_dev *pdev) +{ + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + int rc=0; + + if (edev && (edev->mode & EEH_DEV_SYSFS)) + return; + + rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + + if (rc) + printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + else if (edev) + edev->mode |= EEH_DEV_SYSFS; +} + +void eeh_sysfs_remove_device(struct pci_dev *pdev) +{ + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + /* + * The parent directory might have been removed. We needn't + * continue for that case. + */ + if (!pdev->dev.kobj.sd) { + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; + return; + } + + device_remove_file(&pdev->dev, &dev_attr_eeh_mode); + device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; +} diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 246b11c4fe7e..c04cdf70d487 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -102,7 +102,8 @@ BEGIN_FW_FTR_SECTION /* if from user, see if there are any DTL entries to process */ ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ ld r11,PACA_DTL_RIDX(r13) /* get log read index */ - ld r10,LPPACA_DTLIDX(r10) /* get log write index */ + addi r10,r10,LPPACA_DTLIDX + LDX_BE r10,0,r10 /* get log write index */ cmpd cr1,r11,r10 beq+ cr1,33f bl .accumulate_stolen_time @@ -449,15 +450,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #ifdef CONFIG_PPC_BOOK3S_64 BEGIN_FTR_SECTION - /* - * Back up the TAR across context switches. Note that the TAR is not - * available for use in the kernel. (To provide this, the TAR should - * be backed up/restored on exception entry/exit instead, and be in - * pt_regs. FIXME, this should be in pt_regs anyway (for debug).) - */ - mfspr r0,SPRN_TAR - std r0,THREAD_TAR(r3) - /* Event based branch registers */ mfspr r0, SPRN_BESCR std r0, THREAD_BESCR(r3) @@ -465,20 +457,6 @@ BEGIN_FTR_SECTION std r0, THREAD_EBBHR(r3) mfspr r0, SPRN_EBBRR std r0, THREAD_EBBRR(r3) - - /* PMU registers made user read/(write) by EBB */ - mfspr r0, SPRN_SIAR - std r0, THREAD_SIAR(r3) - mfspr r0, SPRN_SDAR - std r0, THREAD_SDAR(r3) - mfspr r0, SPRN_SIER - std r0, THREAD_SIER(r3) - mfspr r0, SPRN_MMCR0 - std r0, THREAD_MMCR0(r3) - mfspr r0, SPRN_MMCR2 - std r0, THREAD_MMCR2(r3) - mfspr r0, SPRN_MMCRA - std r0, THREAD_MMCRA(r3) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) #endif @@ -545,9 +523,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) */ ld r9,PACA_SLBSHADOWPTR(r13) li r12,0 - std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */ - std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ - std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ + std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */ + li r12,SLBSHADOW_STACKVSID + STDX_BE r7,r12,r9 /* Save VSID */ + li r12,SLBSHADOW_STACKESID + STDX_BE r0,r12,r9 /* Save ESID */ /* No need to check for MMU_FTR_NO_SLBIE_B here, since when * we have 1TB segments, the only CPUs known to have the errata @@ -581,20 +561,6 @@ BEGIN_FTR_SECTION ld r0, THREAD_EBBRR(r4) mtspr SPRN_EBBRR, r0 - /* PMU registers made user read/(write) by EBB */ - ld r0, THREAD_SIAR(r4) - mtspr SPRN_SIAR, r0 - ld r0, THREAD_SDAR(r4) - mtspr SPRN_SDAR, r0 - ld r0, THREAD_SIER(r4) - mtspr SPRN_SIER, r0 - ld r0, THREAD_MMCR0(r4) - mtspr SPRN_MMCR0, r0 - ld r0, THREAD_MMCR2(r4) - mtspr SPRN_MMCR2, r0 - ld r0, THREAD_MMCRA(r4) - mtspr SPRN_MMCRA, r0 - ld r0,THREAD_TAR(r4) mtspr SPRN_TAR,r0 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) @@ -614,7 +580,13 @@ BEGIN_FTR_SECTION cmpwi r6,0 bne 1f ld r0,0(r7) -1: cmpd r0,r25 +1: +BEGIN_FTR_SECTION_NESTED(70) + mfspr r8, SPRN_FSCR + rldimi r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG) + mtspr SPRN_FSCR, r8 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) + cmpd r0,r25 beq 2f mtspr SPRN_DSCR,r0 2: @@ -657,21 +629,43 @@ _GLOBAL(ret_from_except_lite) CURRENT_THREAD_INFO(r9, r1) ld r3,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3E + ld r10,PACACURRENT(r13) +#endif /* CONFIG_PPC_BOOK3E */ ld r4,TI_FLAGS(r9) andi. r3,r3,MSR_PR beq resume_kernel +#ifdef CONFIG_PPC_BOOK3E + lwz r3,(THREAD+THREAD_DBCR0)(r10) +#endif /* CONFIG_PPC_BOOK3E */ /* Check current_thread_info()->flags */ andi. r0,r4,_TIF_USER_WORK_MASK +#ifdef CONFIG_PPC_BOOK3E + bne 1f + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + andis. r0,r3,DBCR0_IDM@h beq restore - - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f + mfmsr r0 + rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ + mtmsr r0 + mtspr SPRN_DBCR0,r3 + li r10, -1 + mtspr SPRN_DBSR,r10 + b restore +#else + beq restore +#endif +1: andi. r0,r4,_TIF_NEED_RESCHED + beq 2f bl .restore_interrupts SCHEDULE_USER b .ret_from_except_lite -1: bl .save_nvgprs +2: bl .save_nvgprs bl .restore_interrupts addi r3,r1,STACK_FRAME_OVERHEAD bl .do_notify_resume @@ -727,9 +721,9 @@ resume_kernel: /* * Here we are preempting the current task. We want to make - * sure we are soft-disabled first + * sure we are soft-disabled first and reconcile irq state. */ - SOFT_DISABLE_INTS(r3,r4) + RECONCILE_IRQ_STATE(r3,r4) 1: bl .preempt_schedule_irq /* Re-test flags and eventually loop */ diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d44a571e45a7..6300c13bbde4 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -30,22 +30,20 @@ extern u32 epapr_ev_idle_start[]; bool epapr_paravirt_enabled; -static int __init epapr_paravirt_init(void) +static int __init early_init_dt_scan_epapr(unsigned long node, + const char *uname, + int depth, void *data) { - struct device_node *hyper_node; const u32 *insts; - int len, i; + unsigned long len; + int i; - hyper_node = of_find_node_by_path("/hypervisor"); - if (!hyper_node) - return -ENODEV; - - insts = of_get_property(hyper_node, "hcall-instructions", &len); + insts = of_get_flat_dt_prop(node, "hcall-instructions", &len); if (!insts) - return -ENODEV; + return 0; if (len % 4 || len > (4 * 4)) - return -ENODEV; + return -1; for (i = 0; i < (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); @@ -55,13 +53,19 @@ static int __init epapr_paravirt_init(void) } #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - if (of_get_property(hyper_node, "has-idle", NULL)) + if (of_get_flat_dt_prop(node, "has-idle", NULL)) ppc_md.power_save = epapr_ev_idle; #endif epapr_paravirt_enabled = true; + return 1; +} + +int __init epapr_paravirt_early_init(void) +{ + of_scan_flat_dt(early_init_dt_scan_epapr, NULL); + return 0; } -early_initcall(epapr_paravirt_init); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 645170a07ada..2d067049db27 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -198,9 +198,9 @@ exc_##n##_common: \ /* This second version is meant for exceptions that don't immediately * hard-enable. We set a bit in paca->irq_happened to ensure that * a subsequent call to arch_local_irq_restore() will properly - * hard-enable and avoid the fast-path + * hard-enable and avoid the fast-path, and then reconcile irq state. */ -#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4) +#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4) /* This is called by exceptions that used INTS_KEEP (that did not touch * irq indicators in the PACA). This will restore MSR:EE to it's previous diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e6eba1bf61ad..3a9ed6ac224b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -293,27 +293,31 @@ system_call_pSeries: * out of line to handle them */ . = 0xe00 -hv_exception_trampoline: +hv_data_storage_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b h_data_storage_hv . = 0xe20 +hv_instr_storage_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b h_instr_storage_hv . = 0xe40 +emulation_assist_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b emulation_assist_hv . = 0xe60 +hv_exception_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_hv . = 0xe80 +hv_doorbell_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_hv @@ -323,28 +327,35 @@ hv_exception_trampoline: * prolog code of the PerformanceMonitor one. A little * trickery is thus necessary */ -performance_monitor_pSeries_1: . = 0xf00 +performance_monitor_pseries_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b performance_monitor_pSeries -altivec_unavailable_pSeries_1: . = 0xf20 +altivec_unavailable_pseries_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b altivec_unavailable_pSeries -vsx_unavailable_pSeries_1: . = 0xf40 +vsx_unavailable_pseries_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_pSeries . = 0xf60 +facility_unavailable_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b facility_unavailable_pSeries + + . = 0xf80 +hv_facility_unavailable_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_pSeries + b facility_unavailable_hv #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) @@ -360,11 +371,7 @@ denorm_exception_hv: HMT_MEDIUM_PPR_DISCARD mtspr SPRN_SPRG_HSCRATCH0,r13 EXCEPTION_PROLOG_0(PACA_EXGEN) - std r11,PACA_EXGEN+EX_R11(r13) - std r12,PACA_EXGEN+EX_R12(r13) - mfspr r9,SPRN_SPRG_HSCRATCH0 - std r9,PACA_EXGEN+EX_R13(r13) - mfcr r9 + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) #ifdef CONFIG_PPC_DENORMALISATION mfspr r10,SPRN_HSRR1 @@ -374,6 +381,7 @@ denorm_exception_hv: bne+ denorm_assist #endif + KVMTEST(0x1500) EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500) @@ -454,38 +462,14 @@ BEGIN_FTR_SECTION xori r10,r10,(MSR_FE0|MSR_FE1) mtmsrd r10 sync - fmr 0,0 - fmr 1,1 - fmr 2,2 - fmr 3,3 - fmr 4,4 - fmr 5,5 - fmr 6,6 - fmr 7,7 - fmr 8,8 - fmr 9,9 - fmr 10,10 - fmr 11,11 - fmr 12,12 - fmr 13,13 - fmr 14,14 - fmr 15,15 - fmr 16,16 - fmr 17,17 - fmr 18,18 - fmr 19,19 - fmr 20,20 - fmr 21,21 - fmr 22,22 - fmr 23,23 - fmr 24,24 - fmr 25,25 - fmr 26,26 - fmr 27,27 - fmr 28,28 - fmr 29,29 - fmr 30,30 - fmr 31,31 + +#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 +#define FMR4(n) FMR2(n) ; FMR2(n+2) +#define FMR8(n) FMR4(n) ; FMR4(n+4) +#define FMR16(n) FMR8(n) ; FMR8(n+8) +#define FMR32(n) FMR16(n) ; FMR16(n+16) + FMR32(0) + FTR_SECTION_ELSE /* * To denormalise we need to move a copy of the register to itself. @@ -495,43 +479,33 @@ FTR_SECTION_ELSE oris r10,r10,MSR_VSX@h mtmsrd r10 sync - XVCPSGNDP(0,0,0) - XVCPSGNDP(1,1,1) - XVCPSGNDP(2,2,2) - XVCPSGNDP(3,3,3) - XVCPSGNDP(4,4,4) - XVCPSGNDP(5,5,5) - XVCPSGNDP(6,6,6) - XVCPSGNDP(7,7,7) - XVCPSGNDP(8,8,8) - XVCPSGNDP(9,9,9) - XVCPSGNDP(10,10,10) - XVCPSGNDP(11,11,11) - XVCPSGNDP(12,12,12) - XVCPSGNDP(13,13,13) - XVCPSGNDP(14,14,14) - XVCPSGNDP(15,15,15) - XVCPSGNDP(16,16,16) - XVCPSGNDP(17,17,17) - XVCPSGNDP(18,18,18) - XVCPSGNDP(19,19,19) - XVCPSGNDP(20,20,20) - XVCPSGNDP(21,21,21) - XVCPSGNDP(22,22,22) - XVCPSGNDP(23,23,23) - XVCPSGNDP(24,24,24) - XVCPSGNDP(25,25,25) - XVCPSGNDP(26,26,26) - XVCPSGNDP(27,27,27) - XVCPSGNDP(28,28,28) - XVCPSGNDP(29,29,29) - XVCPSGNDP(30,30,30) - XVCPSGNDP(31,31,31) + +#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) +#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) +#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) +#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) +#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) + XVCPSGNDP32(0) + ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION + b denorm_done +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER8 we need to do that for all 64 VSX registers + */ + XVCPSGNDP32(32) +denorm_done: mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) RESTORE_PPR_PACA(PACA_EXGEN, r10) +BEGIN_FTR_SECTION + ld r10,PACA_EXGEN+EX_CFAR(r13) + mtspr SPRN_CFAR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) @@ -560,8 +534,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) - STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) + STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -721,7 +697,7 @@ machine_check_common: STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) - STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt) STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) #ifdef CONFIG_PPC_DOORBELL STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception) @@ -831,53 +807,55 @@ system_call_relon_pSeries: STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) . = 0x4e00 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_data_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e20 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_instr_storage_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e40 +emulation_assist_relon_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b emulation_assist_relon_hv . = 0x4e60 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_relon_hv + b . /* Can't happen, see v2.07 Book III-S section 6.5 */ . = 0x4e80 +h_doorbell_relon_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_relon_hv -performance_monitor_relon_pSeries_1: . = 0x4f00 +performance_monitor_relon_pseries_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b performance_monitor_relon_pSeries -altivec_unavailable_relon_pSeries_1: . = 0x4f20 +altivec_unavailable_relon_pseries_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b altivec_unavailable_relon_pSeries -vsx_unavailable_relon_pSeries_1: . = 0x4f40 +vsx_unavailable_relon_pseries_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_relon_pSeries -tm_unavailable_relon_pSeries_1: . = 0x4f60 +facility_unavailable_relon_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b tm_unavailable_relon_pSeries + b facility_unavailable_relon_pSeries + + . = 0x4f80 +hv_facility_unavailable_relon_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b hv_facility_unavailable_relon_hv STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION @@ -1203,36 +1181,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl .vsx_unavailable_exception b .ret_from_except - .align 7 - .globl tm_unavailable_common -tm_unavailable_common: - EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) - bl .save_nvgprs - DISABLE_INTS - addi r3,r1,STACK_FRAME_OVERHEAD - bl .tm_unavailable_exception - b .ret_from_except + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception) .align 7 .globl __end_handlers __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) - STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) - STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) + STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 8a9b6f59822d..67ee0d6c1070 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -822,14 +822,6 @@ finish_tlb_load: rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ -/* extern void giveup_fpu(struct task_struct *prev) - * - * The PowerPC 4xx family of processors do not have an FPU, so this just - * returns. - */ -_ENTRY(giveup_fpu) - blr - /* This is where the main kernel code starts. */ start_here: diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 97e2671cde7f..c334f53453f7 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -784,16 +784,6 @@ _GLOBAL(__fixup_440A_mcheck) sync blr -/* - * extern void giveup_fpu(struct task_struct *prev) - * - * The 44x core does not have an FPU. - */ -#ifndef CONFIG_PPC_FPU -_GLOBAL(giveup_fpu) - blr -#endif - _GLOBAL(set_context) #ifdef CONFIG_BDI_SWITCH diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index b61363d557b5..3d11d8038dee 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -703,6 +703,7 @@ _GLOBAL(relative_toc) mtlr r0 blr +.balign 8 p_toc: .llong __toc_start + 0x8000 - 0b /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b2a5860accfb..1b92a97b1b04 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -691,10 +691,6 @@ modified_instr: b 151b #endif - .globl giveup_fpu -giveup_fpu: - blr - /* * This is where the main kernel code starts. */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index d10a7cacccd2..289afaffbbb5 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -948,16 +948,6 @@ _GLOBAL(giveup_spe) #endif /* CONFIG_SPE */ /* - * extern void giveup_fpu(struct task_struct *prev) - * - * Not all FSL Book-E cores have an FPU - */ -#ifndef CONFIG_PPC_FPU -_GLOBAL(giveup_fpu) - blr -#endif - -/* * extern void abort(void) * * At present, this routine just applies a system reset. diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a949bdfc9623..f0b47d1a6b0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((bp->attr.bp_addr >> 10) != - ((bp->attr.bp_addr + bp->attr.bp_len) >> 10)) + ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) return -EINVAL; } if (info->len > @@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) * we still need to single-step the instruction, but we don't * generate an event. */ + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; if (!((bp->attr.bp_addr <= dar) && (dar - bp->attr.bp_addr < bp->attr.bp_len))) info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 8220baa46faf..16a7c2326d48 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -205,7 +205,7 @@ static int ibmebus_create_devices(const struct of_device_id *matches) return ret; } -int ibmebus_register_driver(struct of_platform_driver *drv) +int ibmebus_register_driver(struct platform_driver *drv) { /* If the driver uses devices that ibmebus doesn't know, add them */ ibmebus_create_devices(drv->driver.of_match_table); @@ -215,7 +215,7 @@ int ibmebus_register_driver(struct of_platform_driver *drv) } EXPORT_SYMBOL(ibmebus_register_driver); -void ibmebus_unregister_driver(struct of_platform_driver *drv) +void ibmebus_unregister_driver(struct platform_driver *drv) { driver_unregister(&drv->driver); } @@ -338,11 +338,10 @@ static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) static int ibmebus_bus_device_probe(struct device *dev) { int error = -ENODEV; - struct of_platform_driver *drv; + struct platform_driver *drv; struct platform_device *of_dev; - const struct of_device_id *match; - drv = to_of_platform_driver(dev->driver); + drv = to_platform_driver(dev->driver); of_dev = to_platform_device(dev); if (!drv->probe) @@ -350,9 +349,8 @@ static int ibmebus_bus_device_probe(struct device *dev) of_dev_get(of_dev); - match = of_match_device(drv->driver.of_match_table, dev); - if (match) - error = drv->probe(of_dev, match); + if (of_driver_match_device(dev, dev->driver)) + error = drv->probe(of_dev); if (error) of_dev_put(of_dev); @@ -362,7 +360,7 @@ static int ibmebus_bus_device_probe(struct device *dev) static int ibmebus_bus_device_remove(struct device *dev) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); if (dev->driver && drv->remove) drv->remove(of_dev); @@ -372,7 +370,7 @@ static int ibmebus_bus_device_remove(struct device *dev) static void ibmebus_bus_device_shutdown(struct device *dev) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); if (dev->driver && drv->shutdown) drv->shutdown(of_dev); @@ -419,7 +417,7 @@ struct device_attribute ibmebus_bus_device_attrs[] = { static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); int ret = 0; if (dev->driver && drv->suspend) @@ -430,7 +428,7 @@ static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) static int ibmebus_bus_legacy_resume(struct device *dev) { struct platform_device *of_dev = to_platform_device(dev); - struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + struct platform_driver *drv = to_platform_driver(dev->driver); int ret = 0; if (dev->driver && drv->resume) diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 939ea7ef0dc8..d7216c9abda1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -85,7 +85,7 @@ int powersave_nap; /* * Register the sysctl to set/clear powersave_nap. */ -static ctl_table powersave_nap_ctl_table[]={ +static struct ctl_table powersave_nap_ctl_table[] = { { .procname = "powersave-nap", .data = &powersave_nap, @@ -95,7 +95,7 @@ static ctl_table powersave_nap_ctl_table[]={ }, {} }; -static ctl_table powersave_nap_sysctl_root[] = { +static struct ctl_table powersave_nap_sysctl_root[] = { { .procname = "kernel", .mode = 0555, diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 50e90b7e7139..24b968f8e4d8 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -53,8 +53,10 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) return NULL; } +#ifdef CONFIG_PPC_INDIRECT_MMIO struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { + unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -70,11 +72,17 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - ptep = find_linux_pte(init_mm.pgd, vaddr); + ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr, + &hugepage_shift); if (ptep == NULL) paddr = 0; - else + else { + /* + * we don't have hugepages backing iomem + */ + WARN_ON(hugepage_shift); paddr = pte_pfn(*ptep) << PAGE_SHIFT; + } bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) @@ -83,13 +91,25 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) return bus; } +#else /* CONFIG_PPC_INDIRECT_MMIO */ +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) +{ + return NULL; +} +#endif /* !CONFIG_PPC_INDIRECT_MMIO */ +#ifdef CONFIG_PPC_INDIRECT_PIO struct iowa_bus *iowa_pio_find_bus(unsigned long port) { unsigned long vaddr = (unsigned long)pci_io_base + port; return iowa_pci_find(vaddr, 0); } - +#else +struct iowa_bus *iowa_pio_find_bus(unsigned long port) +{ + return NULL; +} +#endif #define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ static ret iowa_##name at \ @@ -130,6 +150,7 @@ static const struct ppc_pci_io iowa_pci_io = { }; +#ifdef CONFIG_PPC_INDIRECT_MMIO static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, unsigned long flags, void *caller) { @@ -144,6 +165,9 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, } return res; } +#else /* CONFIG_PPC_INDIRECT_MMIO */ +#define iowa_ioremap NULL +#endif /* !CONFIG_PPC_INDIRECT_MMIO */ /* Enable IO workaround */ static void io_workaround_init(void) diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 886381f32c3d..2a2b4aeab80f 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -25,6 +25,9 @@ #include <asm/firmware.h> #include <asm/bug.h> +/* See definition in io.h */ +bool isa_io_special; + void _insb(const volatile u8 __iomem *port, void *buf, long count) { u8 *tbuf = buf; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c0d0dbddfba1..572bb5b95f35 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -36,6 +36,8 @@ #include <linux/hash.h> #include <linux/fault-inject.h> #include <linux/pci.h> +#include <linux/iommu.h> +#include <linux/sched.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -44,6 +46,7 @@ #include <asm/kdump.h> #include <asm/fadump.h> #include <asm/vio.h> +#include <asm/tce.h> #define DBG(...) @@ -102,7 +105,7 @@ static int __init fail_iommu_debugfs(void) struct dentry *dir = fault_create_debugfs_attr("fail_iommu", NULL, &fail_iommu); - return PTR_RET(dir); + return PTR_ERR_OR_ZERO(dir); } late_initcall(fail_iommu_debugfs); @@ -658,7 +661,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); + page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); if (!page) panic("iommu_init_table: Can't allocate %ld bytes\n", sz); tbl->it_map = page_address(page); @@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); +#ifdef CONFIG_IOMMU_API + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } +#endif + /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); @@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } } + +#ifdef CONFIG_IOMMU_API +/* + * SPAPR TCE API + */ +static void group_release(void *iommu_data) +{ + struct iommu_table *tbl = iommu_data; + tbl->it_group = NULL; +} + +void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num) +{ + struct iommu_group *grp; + char *name; + + grp = iommu_group_alloc(); + if (IS_ERR(grp)) { + pr_warn("powerpc iommu api: cannot create new group, err=%ld\n", + PTR_ERR(grp)); + return; + } + tbl->it_group = grp; + iommu_group_set_iommudata(grp, tbl, group_release); + name = kasprintf(GFP_KERNEL, "domain%d-pe%lx", + pci_domain_number, pe_num); + if (!name) + return; + iommu_group_set_name(grp, name); + kfree(name); +} + +enum dma_data_direction iommu_tce_direction(unsigned long tce) +{ + if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE)) + return DMA_BIDIRECTIONAL; + else if (tce & TCE_PCI_READ) + return DMA_TO_DEVICE; + else if (tce & TCE_PCI_WRITE) + return DMA_FROM_DEVICE; + else + return DMA_NONE; +} +EXPORT_SYMBOL_GPL(iommu_tce_direction); + +void iommu_flush_tce(struct iommu_table *tbl) +{ + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + /* Make sure updates are seen by hardware */ + mb(); +} +EXPORT_SYMBOL_GPL(iommu_flush_tce); + +int iommu_tce_clear_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce_value, + unsigned long npages) +{ + /* ppc_md.tce_free() does not support any value but 0 */ + if (tce_value) + return -EINVAL; + + if (ioba & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ioba >>= IOMMU_PAGE_SHIFT; + if (ioba < tbl->it_offset) + return -EINVAL; + + if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); + +int iommu_tce_put_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce) +{ + if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) + return -EINVAL; + + if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) + return -EINVAL; + + if (ioba & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ioba >>= IOMMU_PAGE_SHIFT; + if (ioba < tbl->it_offset) + return -EINVAL; + + if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); + +unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long oldtce; + struct iommu_pool *pool = get_pool(tbl, entry); + + spin_lock(&(pool->lock)); + + oldtce = ppc_md.tce_get(tbl, entry); + if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) + ppc_md.tce_free(tbl, entry, 1); + else + oldtce = 0; + + spin_unlock(&(pool->lock)); + + return oldtce; +} +EXPORT_SYMBOL_GPL(iommu_clear_tce); + +int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + unsigned long oldtce; + struct page *page; + + for ( ; pages; --pages, ++entry) { + oldtce = iommu_clear_tce(tbl, entry); + if (!oldtce) + continue; + + page = pfn_to_page(oldtce >> PAGE_SHIFT); + WARN_ON(!page); + if (page) { + if (oldtce & TCE_PCI_WRITE) + SetPageDirty(page); + put_page(page); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); + +/* + * hwaddr is a kernel virtual address here (0xc... bazillion), + * tce_build converts it to a physical address. + */ +int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, + unsigned long hwaddr, enum dma_data_direction direction) +{ + int ret = -EBUSY; + unsigned long oldtce; + struct iommu_pool *pool = get_pool(tbl, entry); + + spin_lock(&(pool->lock)); + + oldtce = ppc_md.tce_get(tbl, entry); + /* Add new entry if it is not busy */ + if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) + ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL); + + spin_unlock(&(pool->lock)); + + /* if (unlikely(ret)) + pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, + hwaddr, ret); */ + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_build); + +int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, + unsigned long tce) +{ + int ret; + struct page *page = NULL; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; + enum dma_data_direction direction = iommu_tce_direction(tce); + + ret = get_user_pages_fast(tce & PAGE_MASK, 1, + direction != DMA_TO_DEVICE, &page); + if (unlikely(ret != 1)) { + /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", + tce, entry << IOMMU_PAGE_SHIFT, ret); */ + return -EFAULT; + } + hwaddr = (unsigned long) page_address(page) + offset; + + ret = iommu_tce_build(tbl, entry, hwaddr, direction); + if (ret) + put_page(page); + + if (ret < 0) + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", + __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode); + +int iommu_take_ownership(struct iommu_table *tbl) +{ + unsigned long sz = (tbl->it_size + 7) >> 3; + + if (tbl->it_offset == 0) + clear_bit(0, tbl->it_map); + + if (!bitmap_empty(tbl->it_map, tbl->it_size)) { + pr_err("iommu_tce: it_map is not empty"); + return -EBUSY; + } + + memset(tbl->it_map, 0xff, sz); + iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_take_ownership); + +void iommu_release_ownership(struct iommu_table *tbl) +{ + unsigned long sz = (tbl->it_size + 7) >> 3; + + iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); + memset(tbl->it_map, 0, sz); + + /* Restore bit#0 set by iommu_init_table() */ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); +} +EXPORT_SYMBOL_GPL(iommu_release_ownership); + +static int iommu_add_device(struct device *dev) +{ + struct iommu_table *tbl; + int ret = 0; + + if (WARN_ON(dev->iommu_group)) { + pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", + dev_name(dev), + iommu_group_id(dev->iommu_group)); + return -EBUSY; + } + + tbl = get_iommu_table_base(dev); + if (!tbl || !tbl->it_group) { + pr_debug("iommu_tce: skipping device %s with no tbl\n", + dev_name(dev)); + return 0; + } + + pr_debug("iommu_tce: adding %s to iommu group %d\n", + dev_name(dev), iommu_group_id(tbl->it_group)); + + ret = iommu_group_add_device(tbl->it_group, dev); + if (ret < 0) + pr_err("iommu_tce: %s has not been added, ret=%d\n", + dev_name(dev), ret); + + return ret; +} + +static void iommu_del_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static int iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + return iommu_add_device(dev); + case BUS_NOTIFY_DEL_DEVICE: + iommu_del_device(dev); + return 0; + default: + return 0; + } +} + +static struct notifier_block tce_iommu_bus_nb = { + .notifier_call = iommu_bus_notifier, +}; + +static int __init tce_iommu_init(void) +{ + struct pci_dev *pdev = NULL; + + BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); + + for_each_pci_dev(pdev) + iommu_add_device(&pdev->dev); + + bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); + return 0; +} + +subsys_initcall_sync(tce_iommu_init); + +#else + +void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num) +{ +} + +#endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5cbcf4d5a808..57d286a78f86 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void) u64 now = get_tb_or_rtc(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - if (now >= *next_tb) - set_dec(1); return now >= *next_tb; } @@ -162,7 +160,7 @@ notrace unsigned int __check_irq_replay(void) * in case we also had a rollover while hard disabled */ local_paca->irq_happened &= ~PACA_IRQ_DEC; - if (decrementer_check_overflow()) + if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) return 0x900; /* Finally check if an external interrupt happened */ @@ -364,7 +362,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); + seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); seq_printf(p, " Performance monitoring interrupts\n"); @@ -443,50 +441,6 @@ void migrate_irqs(void) } #endif -static inline void handle_one_irq(unsigned int irq) -{ - struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (!desc) - return; - - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - - if (curtp == irqtp) { - /* We're already on the irq stack, just handle it */ - desc->handle_irq(irq, desc); - return; - } - - saved_sp_limit = current->thread.ksp_limit; - - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. */ - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | - (curtp->preempt_count & SOFTIRQ_MASK); - - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - call_handle_irq(irq, desc, irqtp, desc->handle_irq); - current->thread.ksp_limit = saved_sp_limit; - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); -} - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -503,9 +457,9 @@ static inline void check_stack_overflow(void) #endif } -void do_IRQ(struct pt_regs *regs) +void __do_irq(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; unsigned int irq; irq_enter(); @@ -521,18 +475,56 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ + /* We can hard enable interrupts now to allow perf interrupts */ may_hard_irq_enable(); /* And finally process it */ - if (irq != NO_IRQ) - handle_one_irq(irq); - else + if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; + else { + desc = irq_to_desc(irq); + if (likely(desc)) + desc->handle_irq(irq, desc); + } trace_irq_exit(regs); irq_exit(); +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct thread_info *curtp, *irqtp; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[raw_smp_processor_id()]; + + /* Already there ? */ + if (unlikely(curtp == irqtp)) { + __do_irq(regs); + set_irq_regs(old_regs); + return; + } + + /* Prepare the thread_info in the irq stack */ + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqtp->preempt_count = curtp->preempt_count; + + /* Switch stack and call */ + call_do_irq(regs, irqtp); + + /* Restore stack limit */ + irqtp->task = NULL; + + /* Copy back updates to the thread_info */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + set_irq_regs(old_regs); } @@ -594,28 +586,22 @@ void irq_ctx_init(void) memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; - tp->preempt_count = 0; memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); tp = hardirq_ctx[i]; tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; } } static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; irqtp->flags = 0; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Set any flag that may have been set on the diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 11f5b03a0b06..2156ea90eb54 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -36,12 +36,6 @@ #include <asm/sstep.h> #include <asm/uaccess.h> -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -#define MSR_SINGLESTEP (MSR_DE) -#else -#define MSR_SINGLESTEP (MSR_SE) -#endif - DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - /* We turn off async exceptions to ensure that the single step will - * be for the instruction we have the kprobe on, if we dont its - * possible we'd get the single step reported for an exception handler - * like Decrementer or External Interrupt */ - regs->msr &= ~MSR_EE; - regs->msr |= MSR_SINGLESTEP; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - regs->msr &= ~MSR_CE; - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); -#ifdef CONFIG_PPC_47x - isync(); -#endif -#endif + enable_single_step(regs); /* * On powerpc we should single step on the original diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6782221d49bd..db28032e320e 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall); static __init void kvm_free_tmp(void) { - unsigned long start, end; - - start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK; - end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK; - - /* Free the tmp space we don't need */ - free_reserved_area(start, end, 0, NULL); + free_reserved_area(&kvm_tmp[kvm_tmp_index], + &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL); } static int __init kvm_guest_init(void) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 0733b05eb856..22e88dd2f34a 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -99,7 +99,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_count = index + 1; /* Check if there is a port who already claimed our slot */ - if (legacy_serial_infos[index].np != 0) { + if (legacy_serial_infos[index].np != NULL) { /* if we still have some room, move it, else override */ if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { printk(KERN_DEBUG "Moved legacy port %d -> %d\n", @@ -152,7 +152,7 @@ static int __init add_legacy_soc_port(struct device_node *np, struct device_node *soc_dev) { u64 addr; - const u32 *addrp; + const __be32 *addrp; upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_PORT; struct device_node *tsi = of_get_parent(np); @@ -221,14 +221,19 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Translate ISA address. If it fails, we still register the port * with no translated address so that it can be picked up as an IO * port later by the serial driver + * + * Note: Don't even try on P8 lpc, we know it's not directly mapped */ - taddr = of_translate_address(np, reg); - if (taddr == OF_BAD_ADDR) + if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) { + taddr = of_translate_address(np, reg); + if (taddr == OF_BAD_ADDR) + taddr = 0; + } else taddr = 0; /* Add port, irq will be dealt with later */ - return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr, - NO_IRQ, UPF_BOOT_AUTOCONF, 0); + return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), + taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); } @@ -237,7 +242,7 @@ static int __init add_legacy_pci_port(struct device_node *np, struct device_node *pci_dev) { u64 addr, base; - const u32 *addrp; + const __be32 *addrp; unsigned int flags; int iotype, index = -1, lindex = 0; @@ -270,7 +275,7 @@ static int __init add_legacy_pci_port(struct device_node *np, if (iotype == UPIO_MEM) base = addr; else - base = addrp[2]; + base = of_read_number(&addrp[2], 1); /* Try to guess an index... If we have subdevices of the pci dev, * we get to their "reg" property @@ -307,19 +312,31 @@ static int __init add_legacy_pci_port(struct device_node *np, static void __init setup_legacy_serial_console(int console) { - struct legacy_serial_info *info = - &legacy_serial_infos[console]; + struct legacy_serial_info *info = &legacy_serial_infos[console]; + struct plat_serial8250_port *port = &legacy_serial_ports[console]; void __iomem *addr; - if (info->taddr == 0) - return; - addr = ioremap(info->taddr, 0x1000); - if (addr == NULL) - return; + /* Check if a translated MMIO address has been found */ + if (info->taddr) { + addr = ioremap(info->taddr, 0x1000); + if (addr == NULL) + return; + udbg_uart_init_mmio(addr, 1); + } else { + /* Check if it's PIO and we support untranslated PIO */ + if (port->iotype == UPIO_PORT && isa_io_special) + udbg_uart_init_pio(port->iobase, 1); + else + return; + } + + /* Try to query the current speed */ if (info->speed == 0) - info->speed = udbg_probe_uart_speed(addr, info->clock); + info->speed = udbg_probe_uart_speed(info->clock); + + /* Set it up */ DBG("default console speed = %d\n", info->speed); - udbg_init_uart(addr, info->speed, info->clock); + udbg_uart_setup(info->speed, info->clock); } /* @@ -367,10 +384,13 @@ void __init find_legacy_serial_ports(void) /* Next, fill our array with ISA ports */ for_each_node_by_type(np, "serial") { struct device_node *isa = of_get_parent(np); - if (isa && !strcmp(isa->name, "isa")) { - index = add_legacy_isa_port(np, isa); - if (index >= 0 && np == stdout) - legacy_serial_console = index; + if (isa && (!strcmp(isa->name, "isa") || + !strcmp(isa->name, "lpc"))) { + if (of_device_is_available(np)) { + index = add_legacy_isa_port(np, isa); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } } of_node_put(isa); } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c deleted file mode 100644 index d92f3871e9cf..000000000000 --- a/arch/powerpc/kernel/lparcfg.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * PowerPC64 LPAR Configuration Information Driver - * - * Dave Engebretsen engebret@us.ibm.com - * Copyright (c) 2003 Dave Engebretsen - * Will Schmidt willschm@us.ibm.com - * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. - * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. - * Nathan Lynch nathanl@austin.ibm.com - * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * This driver creates a proc file at /proc/ppc64/lparcfg which contains - * keyword - value pairs that specify the configuration of the partition. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <asm/uaccess.h> -#include <asm/lppaca.h> -#include <asm/hvcall.h> -#include <asm/firmware.h> -#include <asm/rtas.h> -#include <asm/time.h> -#include <asm/prom.h> -#include <asm/vdso_datapage.h> -#include <asm/vio.h> -#include <asm/mmu.h> - -#define MODULE_VERS "1.9" -#define MODULE_NAME "lparcfg" - -/* #define LPARCFG_DEBUG */ - -/* - * Track sum of all purrs across all processors. This is used to further - * calculate usage values by different applications - */ -static unsigned long get_purr(void) -{ - unsigned long sum_purr = 0; - int cpu; - - for_each_possible_cpu(cpu) { - struct cpu_usage *cu; - - cu = &per_cpu(cpu_usage_array, cpu); - sum_purr += cu->current_tb; - } - return sum_purr; -} - -/* - * Methods used to fetch LPAR data when running on a pSeries platform. - */ - -struct hvcall_ppp_data { - u64 entitlement; - u64 unallocated_entitlement; - u16 group_num; - u16 pool_num; - u8 capped; - u8 weight; - u8 unallocated_weight; - u16 active_procs_in_pool; - u16 active_system_procs; - u16 phys_platform_procs; - u32 max_proc_cap_avail; - u32 entitled_proc_cap_avail; -}; - -/* - * H_GET_PPP hcall returns info in 4 parms. - * entitled_capacity,unallocated_capacity, - * aggregation, resource_capability). - * - * R4 = Entitled Processor Capacity Percentage. - * R5 = Unallocated Processor Capacity Percentage. - * R6 (AABBCCDDEEFFGGHH). - * XXXX - reserved (0) - * XXXX - reserved (0) - * XXXX - Group Number - * XXXX - Pool Number. - * R7 (IIJJKKLLMMNNOOPP). - * XX - reserved. (0) - * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. - * XX - variable processor Capacity Weight - * XX - Unallocated Variable Processor Capacity Weight. - * XXXX - Active processors in Physical Processor Pool. - * XXXX - Processors active on platform. - * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1 - * XXXX - Physical platform procs allocated to virtualization. - * XXXXXX - Max procs capacity % available to the partitions pool. - * XXXXXX - Entitled procs capacity % available to the - * partitions pool. - */ -static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) -{ - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - - rc = plpar_hcall9(H_GET_PPP, retbuf); - - ppp_data->entitlement = retbuf[0]; - ppp_data->unallocated_entitlement = retbuf[1]; - - ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; - ppp_data->pool_num = retbuf[2] & 0xffff; - - ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; - ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; - ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; - ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; - ppp_data->active_system_procs = retbuf[3] & 0xffff; - - ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8; - ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff; - ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff; - - return rc; -} - -static unsigned h_pic(unsigned long *pool_idle_time, - unsigned long *num_procs) -{ - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_PIC, retbuf); - - *pool_idle_time = retbuf[0]; - *num_procs = retbuf[1]; - - return rc; -} - -/* - * parse_ppp_data - * Parse out the data returned from h_get_ppp and h_pic - */ -static void parse_ppp_data(struct seq_file *m) -{ - struct hvcall_ppp_data ppp_data; - struct device_node *root; - const int *perf_level; - int rc; - - rc = h_get_ppp(&ppp_data); - if (rc) - return; - - seq_printf(m, "partition_entitled_capacity=%lld\n", - ppp_data.entitlement); - seq_printf(m, "group=%d\n", ppp_data.group_num); - seq_printf(m, "system_active_processors=%d\n", - ppp_data.active_system_procs); - - /* pool related entries are appropriate for shared configs */ - if (lppaca_of(0).shared_proc) { - unsigned long pool_idle_time, pool_procs; - - seq_printf(m, "pool=%d\n", ppp_data.pool_num); - - /* report pool_capacity in percentage */ - seq_printf(m, "pool_capacity=%d\n", - ppp_data.active_procs_in_pool * 100); - - h_pic(&pool_idle_time, &pool_procs); - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); - } - - seq_printf(m, "unallocated_capacity_weight=%d\n", - ppp_data.unallocated_weight); - seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); - seq_printf(m, "capped=%d\n", ppp_data.capped); - seq_printf(m, "unallocated_capacity=%lld\n", - ppp_data.unallocated_entitlement); - - /* The last bits of information returned from h_get_ppp are only - * valid if the ibm,partition-performance-parameters-level - * property is >= 1. - */ - root = of_find_node_by_path("/"); - if (root) { - perf_level = of_get_property(root, - "ibm,partition-performance-parameters-level", - NULL); - if (perf_level && (*perf_level >= 1)) { - seq_printf(m, - "physical_procs_allocated_to_virtualization=%d\n", - ppp_data.phys_platform_procs); - seq_printf(m, "max_proc_capacity_available=%d\n", - ppp_data.max_proc_cap_avail); - seq_printf(m, "entitled_proc_capacity_available=%d\n", - ppp_data.entitled_proc_cap_avail); - } - - of_node_put(root); - } -} - -/** - * parse_mpp_data - * Parse out data returned from h_get_mpp - */ -static void parse_mpp_data(struct seq_file *m) -{ - struct hvcall_mpp_data mpp_data; - int rc; - - rc = h_get_mpp(&mpp_data); - if (rc) - return; - - seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); - - if (mpp_data.mapped_mem != -1) - seq_printf(m, "mapped_entitled_memory=%ld\n", - mpp_data.mapped_mem); - - seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); - seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); - - seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); - seq_printf(m, "unallocated_entitled_memory_weight=%d\n", - mpp_data.unallocated_mem_weight); - seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", - mpp_data.unallocated_entitlement); - - if (mpp_data.pool_size != -1) - seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", - mpp_data.pool_size); - - seq_printf(m, "entitled_memory_loan_request=%ld\n", - mpp_data.loan_request); - - seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); -} - -/** - * parse_mpp_x_data - * Parse out data returned from h_get_mpp_x - */ -static void parse_mpp_x_data(struct seq_file *m) -{ - struct hvcall_mpp_x_data mpp_x_data; - - if (!firmware_has_feature(FW_FEATURE_XCMO)) - return; - if (h_get_mpp_x(&mpp_x_data)) - return; - - seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); - - if (mpp_x_data.pool_coalesced_bytes) - seq_printf(m, "pool_coalesced_bytes=%ld\n", - mpp_x_data.pool_coalesced_bytes); - if (mpp_x_data.pool_purr_cycles) - seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); - if (mpp_x_data.pool_spurr_cycles) - seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); -} - -#define SPLPAR_CHARACTERISTICS_TOKEN 20 -#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) - -/* - * parse_system_parameter_string() - * Retrieve the potential_processors, max_entitled_capacity and friends - * through the get-system-parameter rtas call. Replace keyword strings as - * necessary. - */ -static void parse_system_parameter_string(struct seq_file *m) -{ - int call_status; - - unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); - if (!local_buffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d\n", - __FILE__, __func__, __LINE__); - return; - } - - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - SPLPAR_CHARACTERISTICS_TOKEN, - __pa(rtas_data_buf), - RTAS_DATA_BUF_SIZE); - memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); - local_buffer[SPLPAR_MAXLENGTH - 1] = '\0'; - spin_unlock(&rtas_data_buf_lock); - - if (call_status != 0) { - printk(KERN_INFO - "%s %s Error calling get-system-parameter (0x%x)\n", - __FILE__, __func__, call_status); - } else { - int splpar_strlen; - int idx, w_idx; - char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); - if (!workbuffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d\n", - __FILE__, __func__, __LINE__); - kfree(local_buffer); - return; - } -#ifdef LPARCFG_DEBUG - printk(KERN_INFO "success calling get-system-parameter\n"); -#endif - splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; - local_buffer += 2; /* step over strlen value */ - - w_idx = 0; - idx = 0; - while ((*local_buffer) && (idx < splpar_strlen)) { - workbuffer[w_idx++] = local_buffer[idx++]; - if ((local_buffer[idx] == ',') - || (local_buffer[idx] == '\0')) { - workbuffer[w_idx] = '\0'; - if (w_idx) { - /* avoid the empty string */ - seq_printf(m, "%s\n", workbuffer); - } - memset(workbuffer, 0, SPLPAR_MAXLENGTH); - idx++; /* skip the comma */ - w_idx = 0; - } else if (local_buffer[idx] == '=') { - /* code here to replace workbuffer contents - with different keyword strings */ - if (0 == strcmp(workbuffer, "MaxEntCap")) { - strcpy(workbuffer, - "partition_max_entitled_capacity"); - w_idx = strlen(workbuffer); - } - if (0 == strcmp(workbuffer, "MaxPlatProcs")) { - strcpy(workbuffer, - "system_potential_processors"); - w_idx = strlen(workbuffer); - } - } - } - kfree(workbuffer); - local_buffer -= 2; /* back up over strlen value */ - } - kfree(local_buffer); -} - -/* Return the number of processors in the system. - * This function reads through the device tree and counts - * the virtual processors, this does not include threads. - */ -static int lparcfg_count_active_processors(void) -{ - struct device_node *cpus_dn = NULL; - int count = 0; - - while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { -#ifdef LPARCFG_DEBUG - printk(KERN_ERR "cpus_dn %p\n", cpus_dn); -#endif - count++; - } - return count; -} - -static void pseries_cmo_data(struct seq_file *m) -{ - int cpu; - unsigned long cmo_faults = 0; - unsigned long cmo_fault_time = 0; - - seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO)); - - if (!firmware_has_feature(FW_FEATURE_CMO)) - return; - - for_each_possible_cpu(cpu) { - cmo_faults += lppaca_of(cpu).cmo_faults; - cmo_fault_time += lppaca_of(cpu).cmo_fault_time; - } - - seq_printf(m, "cmo_faults=%lu\n", cmo_faults); - seq_printf(m, "cmo_fault_time_usec=%lu\n", - cmo_fault_time / tb_ticks_per_usec); - seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp()); - seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp()); - seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size()); -} - -static void splpar_dispatch_data(struct seq_file *m) -{ - int cpu; - unsigned long dispatches = 0; - unsigned long dispatch_dispersions = 0; - - for_each_possible_cpu(cpu) { - dispatches += lppaca_of(cpu).yield_count; - dispatch_dispersions += lppaca_of(cpu).dispersion_count; - } - - seq_printf(m, "dispatches=%lu\n", dispatches); - seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); -} - -static void parse_em_data(struct seq_file *m) -{ - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) - seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); -} - -static int pseries_lparcfg_data(struct seq_file *m, void *v) -{ - int partition_potential_processors; - int partition_active_processors; - struct device_node *rtas_node; - const int *lrdrp = NULL; - - rtas_node = of_find_node_by_path("/rtas"); - if (rtas_node) - lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL); - - if (lrdrp == NULL) { - partition_potential_processors = vdso_data->processorCount; - } else { - partition_potential_processors = *(lrdrp + 4); - } - of_node_put(rtas_node); - - partition_active_processors = lparcfg_count_active_processors(); - - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - /* this call handles the ibm,get-system-parameter contents */ - parse_system_parameter_string(m); - parse_ppp_data(m); - parse_mpp_data(m); - parse_mpp_x_data(m); - pseries_cmo_data(m); - splpar_dispatch_data(m); - - seq_printf(m, "purr=%ld\n", get_purr()); - } else { /* non SPLPAR case */ - - seq_printf(m, "system_active_processors=%d\n", - partition_potential_processors); - - seq_printf(m, "system_potential_processors=%d\n", - partition_potential_processors); - - seq_printf(m, "partition_max_entitled_capacity=%d\n", - partition_potential_processors * 100); - - seq_printf(m, "partition_entitled_capacity=%d\n", - partition_active_processors * 100); - } - - seq_printf(m, "partition_active_processors=%d\n", - partition_active_processors); - - seq_printf(m, "partition_potential_processors=%d\n", - partition_potential_processors); - - seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc); - - seq_printf(m, "slb_size=%d\n", mmu_slb_size); - - parse_em_data(m); - - return 0; -} - -static ssize_t update_ppp(u64 *entitlement, u8 *weight) -{ - struct hvcall_ppp_data ppp_data; - u8 new_weight; - u64 new_entitled; - ssize_t retval; - - /* Get our current parameters */ - retval = h_get_ppp(&ppp_data); - if (retval) - return retval; - - if (entitlement) { - new_weight = ppp_data.weight; - new_entitled = *entitlement; - } else if (weight) { - new_weight = *weight; - new_entitled = ppp_data.entitlement; - } else - return -EINVAL; - - pr_debug("%s: current_entitled = %llu, current_weight = %u\n", - __func__, ppp_data.entitlement, ppp_data.weight); - - pr_debug("%s: new_entitled = %llu, new_weight = %u\n", - __func__, new_entitled, new_weight); - - retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); - return retval; -} - -/** - * update_mpp - * - * Update the memory entitlement and weight for the partition. Caller must - * specify either a new entitlement or weight, not both, to be updated - * since the h_set_mpp call takes both entitlement and weight as parameters. - */ -static ssize_t update_mpp(u64 *entitlement, u8 *weight) -{ - struct hvcall_mpp_data mpp_data; - u64 new_entitled; - u8 new_weight; - ssize_t rc; - - if (entitlement) { - /* Check with vio to ensure the new memory entitlement - * can be handled. - */ - rc = vio_cmo_entitlement_update(*entitlement); - if (rc) - return rc; - } - - rc = h_get_mpp(&mpp_data); - if (rc) - return rc; - - if (entitlement) { - new_weight = mpp_data.mem_weight; - new_entitled = *entitlement; - } else if (weight) { - new_weight = *weight; - new_entitled = mpp_data.entitled_mem; - } else - return -EINVAL; - - pr_debug("%s: current_entitled = %lu, current_weight = %u\n", - __func__, mpp_data.entitled_mem, mpp_data.mem_weight); - - pr_debug("%s: new_entitled = %llu, new_weight = %u\n", - __func__, new_entitled, new_weight); - - rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); - return rc; -} - -/* - * Interface for changing system parameters (variable capacity weight - * and entitled capacity). Format of input is "param_name=value"; - * anything after value is ignored. Valid parameters at this time are - * "partition_entitled_capacity" and "capacity_weight". We use - * H_SET_PPP to alter parameters. - * - * This function should be invoked only on systems with - * FW_FEATURE_SPLPAR. - */ -static ssize_t lparcfg_write(struct file *file, const char __user * buf, - size_t count, loff_t * off) -{ - int kbuf_sz = 64; - char kbuf[kbuf_sz]; - char *tmp; - u64 new_entitled, *new_entitled_ptr = &new_entitled; - u8 new_weight, *new_weight_ptr = &new_weight; - ssize_t retval; - - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) - return -EINVAL; - - if (count > kbuf_sz) - return -EINVAL; - - if (copy_from_user(kbuf, buf, count)) - return -EFAULT; - - kbuf[count - 1] = '\0'; - tmp = strchr(kbuf, '='); - if (!tmp) - return -EINVAL; - - *tmp++ = '\0'; - - if (!strcmp(kbuf, "partition_entitled_capacity")) { - char *endp; - *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); - if (endp == tmp) - return -EINVAL; - - retval = update_ppp(new_entitled_ptr, NULL); - } else if (!strcmp(kbuf, "capacity_weight")) { - char *endp; - *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); - if (endp == tmp) - return -EINVAL; - - retval = update_ppp(NULL, new_weight_ptr); - } else if (!strcmp(kbuf, "entitled_memory")) { - char *endp; - *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); - if (endp == tmp) - return -EINVAL; - - retval = update_mpp(new_entitled_ptr, NULL); - } else if (!strcmp(kbuf, "entitled_memory_weight")) { - char *endp; - *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); - if (endp == tmp) - return -EINVAL; - - retval = update_mpp(NULL, new_weight_ptr); - } else - return -EINVAL; - - if (retval == H_SUCCESS || retval == H_CONSTRAINED) { - retval = count; - } else if (retval == H_BUSY) { - retval = -EBUSY; - } else if (retval == H_HARDWARE) { - retval = -EIO; - } else if (retval == H_PARAMETER) { - retval = -EINVAL; - } - - return retval; -} - -static int lparcfg_data(struct seq_file *m, void *v) -{ - struct device_node *rootdn; - const char *model = ""; - const char *system_id = ""; - const char *tmp; - const unsigned int *lp_index_ptr; - unsigned int lp_index = 0; - - seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); - - rootdn = of_find_node_by_path("/"); - if (rootdn) { - tmp = of_get_property(rootdn, "model", NULL); - if (tmp) - model = tmp; - tmp = of_get_property(rootdn, "system-id", NULL); - if (tmp) - system_id = tmp; - lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", - NULL); - if (lp_index_ptr) - lp_index = *lp_index_ptr; - of_node_put(rootdn); - } - seq_printf(m, "serial_number=%s\n", system_id); - seq_printf(m, "system_type=%s\n", model); - seq_printf(m, "partition_id=%d\n", (int)lp_index); - - return pseries_lparcfg_data(m, v); -} - -static int lparcfg_open(struct inode *inode, struct file *file) -{ - return single_open(file, lparcfg_data, NULL); -} - -static const struct file_operations lparcfg_fops = { - .owner = THIS_MODULE, - .read = seq_read, - .write = lparcfg_write, - .open = lparcfg_open, - .release = single_release, - .llseek = seq_lseek, -}; - -static int __init lparcfg_init(void) -{ - umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; - - /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - mode |= S_IWUSR; - - if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) { - printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); - return -EIO; - } - return 0; -} - -static void __exit lparcfg_cleanup(void) -{ - remove_proc_subtree("powerpc/lparcfg", NULL); -} - -module_init(lparcfg_init); -module_exit(lparcfg_cleanup); -MODULE_DESCRIPTION("Interface for LPAR configuration data"); -MODULE_AUTHOR("Dave Engebretsen"); -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e469f30e6eeb..2b0ad9845363 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,26 +36,41 @@ .text +/* + * We store the saved ksp_limit in the unused part + * of the STACK_FRAME_OVERHEAD + */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr -_GLOBAL(call_handle_irq) +_GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - mtctr r6 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) + bl __do_irq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr @@ -327,8 +342,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) * * flush_icache_range(unsigned long start, unsigned long stop) */ -_KPROBE(__flush_icache_range) +_KPROBE(flush_icache_range) BEGIN_FTR_SECTION + isync blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 6820e45f557b..e59caf874d05 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_irq) - ld r8,0(r6) +_GLOBAL(call_do_irq) mflr r0 std r0,16(r1) - mtctr r8 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl .__do_irq ld r1,0(r1) ld r0,16(r1) mtlr r0 @@ -67,8 +65,10 @@ PPC64_CACHES: * flush all bytes from start through stop-1 inclusive */ -_KPROBE(__flush_icache_range) - +_KPROBE(flush_icache_range) +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* * Flush the data cache to memory * @@ -247,6 +247,37 @@ _GLOBAL(__bswapdi2) blr #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) + +_GLOBAL(rmci_on) + sync + isync + li r3,0x100 + rldicl r3,r3,32,0 + mfspr r5,SPRN_HID4 + or r5,r5,r3 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + sync + blr + +_GLOBAL(rmci_off) + sync + isync + li r3,0x100 + rldicl r3,r3,32,0 + mfspr r5,SPRN_HID4 + andc r5,r5,r3 + sync + mtspr SPRN_HID4,r5 + isync + slbia + isync + sync + blr + /* * Do an IO access in real mode */ @@ -416,19 +447,6 @@ _GLOBAL(scom970_write) blr #endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ - -/* - * disable_kernel_fp() - * Disable the FPU. - */ -_GLOBAL(disable_kernel_fp) - mfmsr r3 - rldicl r0,r3,(63-MSR_FP_LG),1 - rldicl r3,r0,(MSR_FP_LG+1),0 - mtmsrd r3 /* disable use of fpu now */ - isync - blr - /* kexec_wait(phys_cpu) * * wait for the flag to change, indicating this kernel is going away but diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 48fbc2b97e95..8213ee1eb05a 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf, char *tmp = NULL; ssize_t size; - ret = -ENODEV; - if (!ppc_md.nvram_size) + if (!ppc_md.nvram_size) { + ret = -ENODEV; goto out; + } - ret = 0; size = ppc_md.nvram_size(); - if (*ppos >= size || size < 0) + if (size < 0) { + ret = size; + goto out; + } + + if (*ppos >= size) { + ret = 0; goto out; + } count = min_t(size_t, count, size - *ppos); count = min(count, PAGE_SIZE); - ret = -ENOMEM; tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) + if (!tmp) { + ret = -ENOMEM; goto out; + } ret = ppc_md.nvram_read(tmp, count, ppos); if (ret <= 0) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index f8f24685f10a..3fc16e3beb9f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -34,10 +34,10 @@ extern unsigned long __toc_start; */ struct lppaca lppaca[] = { [0 ... (NR_LPPACAS-1)] = { - .desc = 0xd397d781, /* "LpPa" */ - .size = sizeof(struct lppaca), + .desc = cpu_to_be32(0xd397d781), /* "LpPa" */ + .size = cpu_to_be16(sizeof(struct lppaca)), .fpregs_in_use = 1, - .slb_count = 64, + .slb_count = cpu_to_be16(64), .vmxregs_in_use = 0, .page_ins = 0, }, @@ -101,8 +101,8 @@ static inline void free_lppacas(void) { } */ struct slb_shadow slb_shadow[] __cacheline_aligned = { [0 ... (NR_CPUS-1)] = { - .persistent = SLB_NUM_BOLTED, - .buffer_length = sizeof(struct slb_shadow), + .persistent = cpu_to_be32(SLB_NUM_BOLTED), + .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), }, }; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 7f2273cc3c7d..905a24bb7acc 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -306,7 +306,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, unsigned long io_offset = 0; int i, res_bit; - if (hose == 0) + if (hose == NULL) return NULL; /* should never happen */ /* If memory, add on the PCI bridge address offset */ @@ -667,7 +667,7 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, void pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int primary) { - const u32 *ranges; + const __be32 *ranges; int rlen; int pna = of_n_addr_cells(dev); int np = pna + 5; @@ -687,7 +687,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, /* Parse it */ while ((rlen -= np * 4) >= 0) { /* Read next ranges element */ - pci_space = ranges[0]; + pci_space = of_read_number(ranges, 1); pci_addr = of_read_number(ranges + 1, 2); cpu_addr = of_translate_address(dev, ranges + 3); size = of_read_number(ranges + pna + 3, 2); @@ -704,7 +704,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, /* Now consume following elements while they are contiguous */ for (; rlen >= np * sizeof(u32); ranges += np, rlen -= np * 4) { - if (ranges[0] != pci_space) + if (of_read_number(ranges, 1) != pci_space) break; pci_next = of_read_number(ranges + 1, 2); cpu_next = of_translate_address(dev, ranges + 3); @@ -827,6 +827,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev) } for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { struct resource *res = dev->resource + i; + struct pci_bus_region reg; if (!res->flags) continue; @@ -835,8 +836,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev) * at 0 as unset as well, except if PCI_PROBE_ONLY is also set * since in that case, we don't want to re-assign anything */ + pcibios_resource_to_bus(dev, ®, res); if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || - (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { + (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { /* Only print message if not re-assigning */ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " @@ -992,7 +994,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus) ppc_md.pci_dma_bus_setup(bus); } -void pcibios_setup_device(struct pci_dev *dev) +static void pcibios_setup_device(struct pci_dev *dev) { /* Fixup NUMA node as it may not be setup yet by the generic * code and is needed by the DMA init @@ -1013,6 +1015,17 @@ void pcibios_setup_device(struct pci_dev *dev) ppc_md.pci_irq_fixup(dev); } +int pcibios_add_device(struct pci_dev *dev) +{ + /* + * We can only call pcibios_setup_device() after bus setup is complete, + * since some of the platform specific DMA setup code depends on it. + */ + if (dev->bus->is_added) + pcibios_setup_device(dev); + return 0; +} + void pcibios_setup_bus_devices(struct pci_bus *bus) { struct pci_dev *dev; @@ -1042,8 +1055,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) * bases. This is -not- called when generating the PCI tree from * the OF device-tree. */ - if (bus->self != NULL) - pci_read_bridge_bases(bus); + pci_read_bridge_bases(bus); /* Now fixup the bus bus */ pcibios_setup_bus_self(bus); @@ -1449,6 +1461,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) + pci_assign_unassigned_bus_resources(bus); /* Fixup EEH */ eeh_add_device_tree_late(bus); @@ -1467,10 +1481,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) if (ppc_md.pcibios_enable_device_hook(dev)) return -EINVAL; - /* avoid pcie irq fix up impact on cardbus */ - if (dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) - pcibios_setup_device(dev); - return pci_enable_resources(dev, mask); } @@ -1567,7 +1577,7 @@ fake_pci_bus(struct pci_controller *hose, int busnr) { static struct pci_bus bus; - if (hose == 0) { + if (hose == NULL) { printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr); } bus.number = busnr; @@ -1663,12 +1673,8 @@ void pcibios_scan_phb(struct pci_controller *hose) /* Configure PCI Express settings */ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { struct pci_bus *child; - list_for_each_entry(child, &bus->children, node) { - struct pci_dev *self = child->self; - if (!self) - continue; - pcie_bus_configure_settings(child, self->pcie_mpss); - } + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c new file mode 100644 index 000000000000..c1e17ae68a08 --- /dev/null +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -0,0 +1,110 @@ +/* + * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c" + * + * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com> + * Copyright (C) 2005 International Business Machines + * + * Updates, 2005, John Rose <johnrose@austin.ibm.com> + * Updates, 2005, Linas Vepstas <linas@austin.ibm.com> + * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/pci.h> +#include <linux/export.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/firmware.h> +#include <asm/eeh.h> + +/** + * pcibios_release_device - release PCI device + * @dev: PCI device + * + * The function is called before releasing the indicated PCI device. + */ +void pcibios_release_device(struct pci_dev *dev) +{ + eeh_remove_device(dev); +} + +/** + * pcibios_remove_pci_devices - remove all devices under this bus + * @bus: the indicated PCI bus + * + * Remove all of the PCI devices under this bus both from the + * linux pci device tree, and from the powerpc EEH address cache. + */ +void pcibios_remove_pci_devices(struct pci_bus *bus) +{ + struct pci_dev *dev, *tmp; + struct pci_bus *child_bus; + + /* First go down child busses */ + list_for_each_entry(child_bus, &bus->children, node) + pcibios_remove_pci_devices(child_bus); + + pr_debug("PCI: Removing devices on bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + pr_debug(" Removing %s...\n", pci_name(dev)); + pci_stop_and_remove_bus_device(dev); + } +} + +EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); + +/** + * pcibios_add_pci_devices - adds new pci devices to bus + * @bus: the indicated PCI bus + * + * This routine will find and fixup new pci devices under + * the indicated bus. This routine presumes that there + * might already be some devices under this bridge, so + * it carefully tries to add only new devices. (And that + * is how this routine differs from other, similar pcibios + * routines.) + */ +void pcibios_add_pci_devices(struct pci_bus * bus) +{ + int slotno, mode, pass, max; + struct pci_dev *dev; + struct device_node *dn = pci_bus_to_OF_node(bus); + + eeh_add_device_tree_early(dn); + + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + + if (mode == PCI_PROBE_DEVTREE) { + /* use ofdt-based probe */ + of_rescan_bus(dn, bus); + } else if (mode == PCI_PROBE_NORMAL) { + /* + * Use legacy probe. In the partial hotplug case, we + * probably have grandchildren devices unplugged. So + * we don't check the return value from pci_scan_slot() in + * order for fully rescan all the way down to pick them up. + * They can have been removed during partial hotplug. + */ + slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + pcibios_setup_bus_devices(bus); + max = bus->busn_res.start; + for (pass = 0; pass < 2; pass++) { + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + max = pci_scan_bridge(bus, dev, + max, pass); + } + } + } + pcibios_finish_adding_to_bus(bus); +} +EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 2e8629654ca8..a9e311f7a9dd 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -109,7 +109,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) hose = pci_bus_to_host(bus); /* Check if we have IOs allocated */ - if (hose->io_base_alloc == 0) + if (hose->io_base_alloc == NULL) return 0; pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name); @@ -272,7 +272,7 @@ static void quirk_radeon_32bit_msi(struct pci_dev *dev) struct pci_dn *pdn = pci_get_pdn(dev); if (pdn) - pdn->force_32bit_msi = 1; + pdn->force_32bit_msi = true; } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon_32bit_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon_32bit_msi); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index df038442548a..1f61fab59d9b 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -47,9 +47,8 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) void *update_dn_pci_info(struct device_node *dn, void *data) { struct pci_controller *phb = data; - const int *type = - of_get_property(dn, "ibm,pci-config-space-type", NULL); - const u32 *regs; + const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL); + const __be32 *regs; struct pci_dn *pdn; pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); @@ -63,12 +62,14 @@ void *update_dn_pci_info(struct device_node *dn, void *data) #endif regs = of_get_property(dn, "reg", NULL); if (regs) { + u32 addr = of_read_number(regs, 1); + /* First register entry is addr (00BBSS00) */ - pdn->busno = (regs[0] >> 16) & 0xff; - pdn->devfn = (regs[0] >> 8) & 0xff; + pdn->busno = (addr >> 16) & 0xff; + pdn->devfn = (addr >> 8) & 0xff; } - pdn->pci_ext_config_space = (type && *type == 1); + pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1); return NULL; } @@ -98,12 +99,13 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre, /* We started with a phb, iterate all childs */ for (dn = start->child; dn; dn = nextdn) { - const u32 *classp; - u32 class; + const __be32 *classp; + u32 class = 0; nextdn = NULL; classp = of_get_property(dn, "class-code", NULL); - class = classp ? *classp : 0; + if (classp) + class = of_read_number(classp, 1); if (pre && ((ret = pre(dn, data)) != NULL)) return ret; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 2a67e9baa59f..4368ec6fdc8c 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -24,12 +24,12 @@ */ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) { - const u32 *prop; + const __be32 *prop; int len; prop = of_get_property(np, name, &len); if (prop && len >= 4) - return *prop; + return of_read_number(prop, 1); return def; } @@ -77,7 +77,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) unsigned int flags; struct pci_bus_region region; struct resource *res; - const u32 *addrs; + const __be32 *addrs; u32 i; int proplen; @@ -86,14 +86,14 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) return; pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); for (; proplen >= 20; proplen -= 20, addrs += 5) { - flags = pci_parse_of_flags(addrs[0], 0); + flags = pci_parse_of_flags(of_read_number(addrs, 1), 0); if (!flags) continue; base = of_read_number(&addrs[1], 2); size = of_read_number(&addrs[3], 2); if (!size) continue; - i = addrs[0] & 0xff; + i = of_read_number(addrs, 1) & 0xff; pr_debug(" base: %llx, size: %llx, i: %x\n", (unsigned long long)base, (unsigned long long)size, i); @@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, const char *type; struct pci_slot *slot; - dev = alloc_pci_dev(); + dev = pci_alloc_dev(bus); if (!dev) return NULL; type = of_get_property(node, "device_type", NULL); @@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); - dev->bus = bus; dev->dev.of_node = of_node_get(node); dev->dev.parent = bus->bridge; dev->dev.bus = &pci_bus_type; @@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" class: 0x%x\n", dev->class); pr_debug(" revision: 0x%x\n", dev->revision); - dev->current_state = 4; /* unknown power state */ + dev->current_state = PCI_UNKNOWN; /* unknown power state */ dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; @@ -208,7 +207,7 @@ void of_scan_pci_bridge(struct pci_dev *dev) { struct device_node *node = dev->dev.of_node; struct pci_bus *bus; - const u32 *busrange, *ranges; + const __be32 *busrange, *ranges; int len, i, mode; struct pci_bus_region region; struct resource *res; @@ -231,15 +230,21 @@ void of_scan_pci_bridge(struct pci_dev *dev) return; } - bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + bus = pci_find_bus(pci_domain_nr(dev->bus), + of_read_number(busrange, 1)); if (!bus) { - printk(KERN_ERR "Failed to create pci bus for %s\n", - node->full_name); - return; + bus = pci_add_new_bus(dev->bus, dev, + of_read_number(busrange, 1)); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } } bus->primary = dev->bus->number; - pci_bus_insert_busn_res(bus, busrange[0], busrange[1]); + pci_bus_insert_busn_res(bus, of_read_number(busrange, 1), + of_read_number(busrange+1, 1)); bus->bridge_ctl = 0; /* parse ranges property */ @@ -252,7 +257,7 @@ void of_scan_pci_bridge(struct pci_dev *dev) } i = 1; for (; len >= 32; len -= 32, ranges += 8) { - flags = pci_parse_of_flags(ranges[0], 1); + flags = pci_parse_of_flags(of_read_number(ranges, 1), 1); size = of_read_number(&ranges[6], 2); if (flags == 0 || size == 0) continue; @@ -293,6 +298,38 @@ void of_scan_pci_bridge(struct pci_dev *dev) } EXPORT_SYMBOL(of_scan_pci_bridge); +static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, + struct device_node *dn) +{ + struct pci_dev *dev = NULL; + const u32 *reg; + int reglen, devfn; + + pr_debug(" * %s\n", dn->full_name); + if (!of_device_is_available(dn)) + return NULL; + + reg = of_get_property(dn, "reg", ®len); + if (reg == NULL || reglen < 20) + return NULL; + devfn = (reg[0] >> 8) & 0xff; + + /* Check if the PCI device is already there */ + dev = pci_get_slot(bus, devfn); + if (dev) { + pci_dev_put(dev); + return dev; + } + + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(dn, bus, devfn); + if (!dev) + return NULL; + + pr_debug(" dev header type: %x\n", dev->hdr_type); + return dev; +} + /** * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices * @node: device tree node for the PCI bus @@ -303,8 +340,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, int rescan_existing) { struct device_node *child; - const u32 *reg; - int reglen, devfn; struct pci_dev *dev; pr_debug("of_scan_bus(%s) bus no %d...\n", @@ -312,16 +347,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, /* Scan direct children */ for_each_child_of_node(node, child) { - pr_debug(" * %s\n", child->full_name); - if (!of_device_is_available(child)) - continue; - reg = of_get_property(child, "reg", ®len); - if (reg == NULL || reglen < 20) - continue; - devfn = (reg[0] >> 8) & 0xff; - - /* create a new pci_dev for this device */ - dev = of_create_pci_dev(child, bus, devfn); + dev = of_scan_pci_dev(bus, child); if (!dev) continue; pr_debug(" dev header type: %x\n", dev->hdr_type); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index c29666586998..21646dbe1bb3 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -96,7 +96,9 @@ EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(start_thread); +#ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); +#endif #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); #endif /* CONFIG_ALTIVEC */ @@ -111,7 +113,6 @@ EXPORT_SYMBOL(giveup_spe); #ifndef CONFIG_PPC64 EXPORT_SYMBOL(flush_instruction_cache); #endif -EXPORT_SYMBOL(__flush_icache_range); EXPORT_SYMBOL(flush_dcache_range); #ifdef CONFIG_SMP diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index feb8580fdc84..c30612aad68e 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -29,25 +29,9 @@ #ifdef CONFIG_PPC64 -static loff_t page_map_seek( struct file *file, loff_t off, int whence) +static loff_t page_map_seek(struct file *file, loff_t off, int whence) { - loff_t new; - switch(whence) { - case 0: - new = off; - break; - case 1: - new = file->f_pos + off; - break; - case 2: - new = PAGE_SIZE + off; - break; - default: - return -EINVAL; - } - if ( new < 0 || new > PAGE_SIZE ) - return -EINVAL; - return (file->f_pos = new); + return fixed_size_llseek(file, off, whence, PAGE_SIZE); } static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a902723fdc69..96d2fdf3aa9e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -74,6 +74,7 @@ struct task_struct *last_task_used_vsx = NULL; struct task_struct *last_task_used_spe = NULL; #endif +#ifdef CONFIG_PPC_FPU /* * Make sure the floating-point register state in the * the thread_struct is up to date for task tsk. @@ -107,6 +108,7 @@ void flush_fp_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_fp_to_thread); +#endif void enable_kernel_fp(void) { @@ -399,7 +401,8 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) { mtspr(SPRN_DABR, dabr); - mtspr(SPRN_DABRX, dabrx); + if (cpu_has_feature(CPU_FTR_DABRX)) + mtspr(SPRN_DABRX, dabrx); return 0; } #else @@ -599,6 +602,16 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + /* Back up the TAR across context switches. + * Note that the TAR is not available for use in the kernel. (To + * provide this, the TAR should be backed up/restored on exception + * entry/exit instead, and be in pt_regs. FIXME, this should be in + * pt_regs anyway (for debug).) + * Save the TAR here before we do treclaim/trecheckpoint as these + * will change the TAR. + */ + save_tar(&prev->thread); + __switch_to_tm(prev); #ifdef CONFIG_SMP @@ -915,7 +928,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) flush_altivec_to_thread(src); flush_vsx_to_thread(src); flush_spe_to_thread(src); + *dst = *src; + + clear_task_ebb(dst); + return 0; } @@ -983,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; +#ifdef CONFIG_PPC32 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + _ALIGN_UP(sizeof(struct thread_info), 16); - +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; #endif @@ -1368,7 +1386,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) #ifdef CONFIG_PPC64 /* Called with hard IRQs off */ -void __ppc64_runlatch_on(void) +void notrace __ppc64_runlatch_on(void) { struct thread_info *ti = current_thread_info(); unsigned long ctrl; @@ -1381,7 +1399,7 @@ void __ppc64_runlatch_on(void) } /* Called with hard IRQs off */ -void __ppc64_runlatch_off(void) +void notrace __ppc64_runlatch_off(void) { struct thread_info *ti = current_thread_info(); unsigned long ctrl; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..b7634ce41dbc 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -215,16 +215,16 @@ static void __init check_cpu_pa_features(unsigned long node) #ifdef CONFIG_PPC_STD_MMU_64 static void __init check_cpu_slb_size(unsigned long node) { - u32 *slb_size_ptr; + __be32 *slb_size_ptr; slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); if (slb_size_ptr != NULL) { - mmu_slb_size = *slb_size_ptr; + mmu_slb_size = be32_to_cpup(slb_size_ptr); return; } slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL); if (slb_size_ptr != NULL) { - mmu_slb_size = *slb_size_ptr; + mmu_slb_size = be32_to_cpup(slb_size_ptr); } } #else @@ -279,11 +279,11 @@ static void __init check_cpu_feature_properties(unsigned long node) { unsigned long i; struct feature_property *fp = feature_properties; - const u32 *prop; + const __be32 *prop; for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { prop = of_get_flat_dt_prop(node, fp->name, NULL); - if (prop && *prop >= fp->min_value) { + if (prop && be32_to_cpup(prop) >= fp->min_value) { cur_cpu_spec->cpu_features |= fp->cpu_feature; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr; } @@ -295,8 +295,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, void *data) { char *type = of_get_flat_dt_prop(node, "device_type", NULL); - const u32 *prop; - const u32 *intserv; + const __be32 *prop; + const __be32 *intserv; int i, nthreads; unsigned long len; int found = -1; @@ -324,8 +324,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * version 2 of the kexec param format adds the phys cpuid of * booted proc. */ - if (initial_boot_params->version >= 2) { - if (intserv[i] == initial_boot_params->boot_cpuid_phys) { + if (be32_to_cpu(initial_boot_params->version) >= 2) { + if (be32_to_cpu(intserv[i]) == + be32_to_cpu(initial_boot_params->boot_cpuid_phys)) { found = boot_cpu_count; found_thread = i; } @@ -347,9 +348,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (found >= 0) { DBG("boot cpu: logical %d physical %d\n", found, - intserv[found_thread]); + be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - set_hard_smp_processor_id(found, intserv[found_thread]); + set_hard_smp_processor_id(found, + be32_to_cpu(intserv[found_thread])); /* * PAPR defines "logical" PVR values for cpus that @@ -366,8 +368,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * it uses 0x0f000001. */ prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (*prop & 0xff000000) == 0x0f000000) - identify_cpu(0, *prop); + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) + identify_cpu(0, be32_to_cpup(prop)); identical_pvr_fixup(node); } @@ -389,7 +391,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, int depth, void *data) { - unsigned long *lprop; + unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ /* Use common scan routine to determine if this is the chosen node */ if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) @@ -454,7 +456,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) if (dm == NULL || l < sizeof(__be32)) return 0; - n = *dm++; /* number of entries */ + n = of_read_number(dm++, 1); /* number of entries */ if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) return 0; @@ -466,7 +468,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) for (; n != 0; --n) { base = dt_mem_next_cell(dt_root_addr_cells, &dm); - flags = dm[3]; + flags = of_read_number(&dm[3], 1); /* skip DRC index, pad, assoc. list index, flags */ dm += 4; /* skip this block if the reserved bit is set in flags (0x80) @@ -544,14 +546,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) memblock_add(base, size); } -void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) -{ - return __va(memblock_alloc(size, align)); -} - #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); @@ -559,27 +555,60 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start, } #endif +static void __init early_reserve_mem_dt(void) +{ + unsigned long i, len, dt_root; + const __be32 *prop; + + dt_root = of_get_flat_dt_root(); + + prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); + + if (!prop) + return; + + DBG("Found new-style reserved-ranges\n"); + + /* Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. */ + for (i = 0; i < len / (sizeof(*prop) * 4); i++) { + u64 base, size; + + base = of_read_number(prop + (i * 4) + 0, 2); + size = of_read_number(prop + (i * 4) + 2, 2); + + if (size) { + DBG("reserving: %llx -> %llx\n", base, size); + memblock_reserve(base, size); + } + } +} + static void __init early_reserve_mem(void) { u64 base, size; - u64 *reserve_map; + __be64 *reserve_map; unsigned long self_base; unsigned long self_size; - reserve_map = (u64 *)(((unsigned long)initial_boot_params) + - initial_boot_params->off_mem_rsvmap); + reserve_map = (__be64 *)(((unsigned long)initial_boot_params) + + be32_to_cpu(initial_boot_params->off_mem_rsvmap)); /* before we do anything, lets reserve the dt blob */ self_base = __pa((unsigned long)initial_boot_params); - self_size = initial_boot_params->totalsize; + self_size = be32_to_cpu(initial_boot_params->totalsize); memblock_reserve(self_base, self_size); + /* Look for the new "reserved-regions" property in the DT */ + early_reserve_mem_dt(); + #ifdef CONFIG_BLK_DEV_INITRD - /* then reserve the initrd, if any */ - if (initrd_start && (initrd_end > initrd_start)) + /* Then reserve the initrd, if any */ + if (initrd_start && (initrd_end > initrd_start)) { memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), _ALIGN_UP(initrd_end, PAGE_SIZE) - _ALIGN_DOWN(initrd_start, PAGE_SIZE)); + } #endif /* CONFIG_BLK_DEV_INITRD */ #ifdef CONFIG_PPC32 @@ -587,13 +616,15 @@ static void __init early_reserve_mem(void) * Handle the case where we might be booting from an old kexec * image that setup the mem_rsvmap as pairs of 32-bit values */ - if (*reserve_map > 0xffffffffull) { + if (be64_to_cpup(reserve_map) > 0xffffffffull) { u32 base_32, size_32; - u32 *reserve_map_32 = (u32 *)reserve_map; + __be32 *reserve_map_32 = (__be32 *)reserve_map; + + DBG("Found old 32-bit reserve map\n"); while (1) { - base_32 = *(reserve_map_32++); - size_32 = *(reserve_map_32++); + base_32 = be32_to_cpup(reserve_map_32++); + size_32 = be32_to_cpup(reserve_map_32++); if (size_32 == 0) break; /* skip if the reservation is for the blob */ @@ -605,9 +636,12 @@ static void __init early_reserve_mem(void) return; } #endif + DBG("Processing reserve map\n"); + + /* Handle the reserve map in the fdt blob if it exists */ while (1) { - base = *(reserve_map++); - size = *(reserve_map++); + base = be64_to_cpup(reserve_map++); + size = be64_to_cpup(reserve_map++); if (size == 0) break; DBG("reserving: %llx -> %llx\n", base, size); @@ -757,6 +791,32 @@ struct device_node *of_find_next_cache_node(struct device_node *np) return NULL; } +/** + * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device + * @np: device node of the device + * + * This looks for a property "ibm,chip-id" in the node or any + * of its parents and returns its content, or -1 if it cannot + * be found. + */ +int of_get_ibm_chip_id(struct device_node *np) +{ + of_node_get(np); + while(np) { + struct device_node *old = np; + const __be32 *prop; + + prop = of_get_property(np, "ibm,chip-id", NULL); + if (prop) { + of_node_put(np); + return be32_to_cpup(prop); + } + np = of_get_parent(np); + of_node_put(old); + } + return -1; +} + #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: @@ -827,49 +887,10 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* Find the device node for a given logical cpu number, also returns the cpu - * local thread number (index in ibm,interrupt-server#s) if relevant and - * asked for (non NULL) - */ -struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { - int hardid; - struct device_node *np; - - hardid = get_hard_smp_processor_id(cpu); - - for_each_node_by_type(np, "cpu") { - const u32 *intserv; - unsigned int plen, t; - - /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist - * fallback to "reg" property and assume no threads - */ - intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", - &plen); - if (intserv == NULL) { - const u32 *reg = of_get_property(np, "reg", NULL); - if (reg == NULL) - continue; - if (*reg == hardid) { - if (thread) - *thread = 0; - return np; - } - } else { - plen /= sizeof(u32); - for (t = 0; t < plen; t++) { - if (hardid == intserv[t]) { - if (thread) - *thread = t; - return np; - } - } - } - } - return NULL; + return (int)phys_id == get_hard_smp_processor_id(cpu); } -EXPORT_SYMBOL(of_get_cpu_node); #if defined(CONFIG_DEBUG_FS) && defined(DEBUG) static struct debugfs_blob_wrapper flat_dt_blob; @@ -879,7 +900,7 @@ static int __init export_flat_device_tree(void) struct dentry *d; flat_dt_blob.data = initial_boot_params; - flat_dt_blob.size = initial_boot_params->totalsize; + flat_dt_blob.size = be32_to_cpu(initial_boot_params->totalsize); d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR, powerpc_debugfs_root, &flat_dt_blob); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5eccda9fd33f..5fe2842e8bab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -107,10 +107,10 @@ int of_workarounds; typedef u32 prom_arg_t; struct prom_args { - u32 service; - u32 nargs; - u32 nret; - prom_arg_t args[10]; + __be32 service; + __be32 nargs; + __be32 nret; + __be32 args[10]; }; struct prom_t { @@ -123,11 +123,11 @@ struct prom_t { }; struct mem_map_entry { - u64 base; - u64 size; + __be64 base; + __be64 size; }; -typedef u32 cell_t; +typedef __be32 cell_t; extern void __start(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, unsigned long r8, @@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; +static bool rtas_has_query_cpu_stopped; + /* * Error results ... some OF calls will return "-1" on error, some @@ -219,13 +221,13 @@ static int __init call_prom(const char *service, int nargs, int nret, ...) struct prom_args args; va_list list; - args.service = ADDR(service); - args.nargs = nargs; - args.nret = nret; + args.service = cpu_to_be32(ADDR(service)); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nret); va_start(list, nret); for (i = 0; i < nargs; i++) - args.args[i] = va_arg(list, prom_arg_t); + args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t)); va_end(list); for (i = 0; i < nret; i++) @@ -234,7 +236,7 @@ static int __init call_prom(const char *service, int nargs, int nret, ...) if (enter_prom(&args, prom_entry) < 0) return PROM_ERROR; - return (nret > 0) ? args.args[nargs] : 0; + return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0; } static int __init call_prom_ret(const char *service, int nargs, int nret, @@ -244,13 +246,13 @@ static int __init call_prom_ret(const char *service, int nargs, int nret, struct prom_args args; va_list list; - args.service = ADDR(service); - args.nargs = nargs; - args.nret = nret; + args.service = cpu_to_be32(ADDR(service)); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nret); va_start(list, rets); for (i = 0; i < nargs; i++) - args.args[i] = va_arg(list, prom_arg_t); + args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t)); va_end(list); for (i = 0; i < nret; i++) @@ -261,9 +263,9 @@ static int __init call_prom_ret(const char *service, int nargs, int nret, if (rets != NULL) for (i = 1; i < nret; ++i) - rets[i-1] = args.args[nargs+i]; + rets[i-1] = be32_to_cpu(args.args[nargs+i]); - return (nret > 0) ? args.args[nargs] : 0; + return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0; } @@ -527,7 +529,7 @@ static int __init prom_setprop(phandle node, const char *nodename, #define islower(c) ('a' <= (c) && (c) <= 'z') #define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) -unsigned long prom_strtoul(const char *cp, const char **endp) +static unsigned long prom_strtoul(const char *cp, const char **endp) { unsigned long result = 0, base = 10, value; @@ -552,7 +554,7 @@ unsigned long prom_strtoul(const char *cp, const char **endp) return result; } -unsigned long prom_memparse(const char *ptr, const char **retptr) +static unsigned long prom_memparse(const char *ptr, const char **retptr) { unsigned long ret = prom_strtoul(ptr, retptr); int shift = 0; @@ -644,7 +646,8 @@ unsigned char ibm_architecture_vec[] = { W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ W(0xffff0000), W(0x003e0000), /* POWER6 */ W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8 */ + W(0xffff0000), W(0x004b0000), /* POWER8E */ + W(0xffff0000), W(0x004d0000), /* POWER8 */ W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ @@ -706,7 +709,7 @@ unsigned char ibm_architecture_vec[] = { * must match by the macro below. Update the definition if * the structure layout changes. */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 117 +#define IBM_ARCH_VEC_NRCORES_OFFSET 125 W(NR_CPUS), /* number of cores supported */ 0, 0, @@ -723,7 +726,8 @@ unsigned char ibm_architecture_vec[] = { }; -/* Old method - ELF header with PT_NOTE sections */ +/* Old method - ELF header with PT_NOTE sections only works on BE */ +#ifdef __BIG_ENDIAN__ static struct fake_elf { Elf32_Ehdr elfhdr; Elf32_Phdr phdr[2]; @@ -809,6 +813,7 @@ static struct fake_elf { } } }; +#endif /* __BIG_ENDIAN__ */ static int __init prom_count_smt_threads(void) { @@ -851,9 +856,9 @@ static int __init prom_count_smt_threads(void) static void __init prom_send_capabilities(void) { - ihandle elfloader, root; + ihandle root; prom_arg_t ret; - u32 *cores; + __be32 *cores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { @@ -863,15 +868,15 @@ static void __init prom_send_capabilities(void) * (we assume this is the same for all cores) and use it to * divide NR_CPUS. */ - cores = (u32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; - if (*cores != NR_CPUS) { + cores = (__be32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; + if (be32_to_cpup(cores) != NR_CPUS) { prom_printf("WARNING ! " "ibm_architecture_vec structure inconsistent: %lu!\n", - *cores); + be32_to_cpup(cores)); } else { - *cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + *cores = cpu_to_be32(DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads())); prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", - *cores, NR_CPUS); + be32_to_cpup(cores), NR_CPUS); } /* try calling the ibm,client-architecture-support method */ @@ -892,17 +897,24 @@ static void __init prom_send_capabilities(void) prom_printf(" not implemented\n"); } - /* no ibm,client-architecture-support call, try the old way */ - elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader")); - if (elfloader == 0) { - prom_printf("couldn't open /packages/elf-loader\n"); - return; +#ifdef __BIG_ENDIAN__ + { + ihandle elfloader; + + /* no ibm,client-architecture-support call, try the old way */ + elfloader = call_prom("open", 1, 1, + ADDR("/packages/elf-loader")); + if (elfloader == 0) { + prom_printf("couldn't open /packages/elf-loader\n"); + return; + } + call_prom("call-method", 3, 1, ADDR("process-elf-header"), + elfloader, ADDR(&fake_elf)); + call_prom("close", 1, 0, elfloader); } - call_prom("call-method", 3, 1, ADDR("process-elf-header"), - elfloader, ADDR(&fake_elf)); - call_prom("close", 1, 0, elfloader); +#endif /* __BIG_ENDIAN__ */ } -#endif +#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ /* * Memory allocation strategy... our layout is normally: @@ -1049,11 +1061,11 @@ static unsigned long __init prom_next_cell(int s, cell_t **cellp) p++; s--; } - r = *p++; + r = be32_to_cpu(*p++); #ifdef CONFIG_PPC64 if (s > 1) { r <<= 32; - r |= *(p++); + r |= be32_to_cpu(*(p++)); } #endif *cellp = p; @@ -1086,8 +1098,8 @@ static void __init reserve_mem(u64 base, u64 size) if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) prom_panic("Memory reserve map exhausted !\n"); - mem_reserve_map[cnt].base = base; - mem_reserve_map[cnt].size = size; + mem_reserve_map[cnt].base = cpu_to_be64(base); + mem_reserve_map[cnt].size = cpu_to_be64(size); mem_reserve_cnt = cnt + 1; } @@ -1101,6 +1113,7 @@ static void __init prom_init_mem(void) char *path, type[64]; unsigned int plen; cell_t *p, *endp; + __be32 val; u32 rac, rsc; /* @@ -1108,12 +1121,14 @@ static void __init prom_init_mem(void) * 1) top of RMO (first node) * 2) top of memory */ - rac = 2; - prom_getprop(prom.root, "#address-cells", &rac, sizeof(rac)); - rsc = 1; - prom_getprop(prom.root, "#size-cells", &rsc, sizeof(rsc)); - prom_debug("root_addr_cells: %x\n", (unsigned long) rac); - prom_debug("root_size_cells: %x\n", (unsigned long) rsc); + val = cpu_to_be32(2); + prom_getprop(prom.root, "#address-cells", &val, sizeof(val)); + rac = be32_to_cpu(val); + val = cpu_to_be32(1); + prom_getprop(prom.root, "#size-cells", &val, sizeof(rsc)); + rsc = be32_to_cpu(val); + prom_debug("root_addr_cells: %x\n", rac); + prom_debug("root_size_cells: %x\n", rsc); prom_debug("scanning memory:\n"); path = prom_scratch; @@ -1221,25 +1236,23 @@ static void __init prom_init_mem(void) static void __init prom_close_stdin(void) { - ihandle val; + __be32 val; + ihandle stdin; - if (prom_getprop(prom.chosen, "stdin", &val, sizeof(val)) > 0) - call_prom("close", 1, 0, val); + if (prom_getprop(prom.chosen, "stdin", &val, sizeof(val)) > 0) { + stdin = be32_to_cpu(val); + call_prom("close", 1, 0, stdin); + } } #ifdef CONFIG_PPC_POWERNV -static u64 __initdata prom_opal_size; -static u64 __initdata prom_opal_align; -static int __initdata prom_rtas_start_cpu; -static u64 __initdata prom_rtas_data; -static u64 __initdata prom_rtas_entry; - #ifdef CONFIG_PPC_EARLY_DEBUG_OPAL static u64 __initdata prom_opal_base; static u64 __initdata prom_opal_entry; #endif +#ifdef __BIG_ENDIAN__ /* XXX Don't change this structure without updating opal-takeover.S */ static struct opal_secondary_data { s64 ack; /* 0 */ @@ -1247,6 +1260,12 @@ static struct opal_secondary_data { struct opal_takeover_args args; /* 16 */ } opal_secondary_data; +static u64 __initdata prom_opal_align; +static u64 __initdata prom_opal_size; +static int __initdata prom_rtas_start_cpu; +static u64 __initdata prom_rtas_data; +static u64 __initdata prom_rtas_entry; + extern char opal_secondary_entry; static void __init prom_query_opal(void) @@ -1264,6 +1283,7 @@ static void __init prom_query_opal(void) } prom_printf("Querying for OPAL presence... "); + rc = opal_query_takeover(&prom_opal_size, &prom_opal_align); prom_debug("(rc = %ld) ", rc); @@ -1279,7 +1299,8 @@ static void __init prom_query_opal(void) prom_opal_align = 0x10000; } -static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...) +static int __init prom_rtas_call(int token, int nargs, int nret, + int *outputs, ...) { struct rtas_args rtas_args; va_list list; @@ -1424,6 +1445,7 @@ static void __init prom_opal_takeover(void) for (;;) opal_do_takeover(args); } +#endif /* __BIG_ENDIAN__ */ /* * Allocate room for and instantiate OPAL @@ -1434,6 +1456,7 @@ static void __init prom_instantiate_opal(void) ihandle opal_inst; u64 base, entry; u64 size = 0, align = 0x10000; + __be64 val64; u32 rets[2]; prom_debug("prom_instantiate_opal: start...\n"); @@ -1443,11 +1466,14 @@ static void __init prom_instantiate_opal(void) if (!PHANDLE_VALID(opal_node)) return; - prom_getprop(opal_node, "opal-runtime-size", &size, sizeof(size)); + val64 = 0; + prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64)); + size = be64_to_cpu(val64); if (size == 0) return; - prom_getprop(opal_node, "opal-runtime-alignment", &align, - sizeof(align)); + val64 = 0; + prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64)); + align = be64_to_cpu(val64); base = alloc_down(size, align, 0); if (base == 0) { @@ -1504,6 +1530,7 @@ static void __init prom_instantiate_rtas(void) phandle rtas_node; ihandle rtas_inst; u32 base, entry = 0; + __be32 val; u32 size = 0; prom_debug("prom_instantiate_rtas: start...\n"); @@ -1513,7 +1540,9 @@ static void __init prom_instantiate_rtas(void) if (!PHANDLE_VALID(rtas_node)) return; - prom_getprop(rtas_node, "rtas-size", &size, sizeof(size)); + val = 0; + prom_getprop(rtas_node, "rtas-size", &val, sizeof(size)); + size = be32_to_cpu(val); if (size == 0) return; @@ -1540,12 +1569,19 @@ static void __init prom_instantiate_rtas(void) reserve_mem(base, size); + val = cpu_to_be32(base); prom_setprop(rtas_node, "/rtas", "linux,rtas-base", - &base, sizeof(base)); + &val, sizeof(val)); + val = cpu_to_be32(entry); prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", - &entry, sizeof(entry)); + &val, sizeof(val)); -#ifdef CONFIG_PPC_POWERNV + /* Check if it supports "query-cpu-stopped-state" */ + if (prom_getprop(rtas_node, "query-cpu-stopped-state", + &val, sizeof(val)) != PROM_ERROR) + rtas_has_query_cpu_stopped = true; + +#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) /* PowerVN takeover hack */ prom_rtas_data = base; prom_rtas_entry = entry; @@ -1619,6 +1655,7 @@ static void __init prom_instantiate_sml(void) /* * Allocate room for and initialize TCE tables */ +#ifdef __BIG_ENDIAN__ static void __init prom_initialize_tce_table(void) { phandle node; @@ -1747,7 +1784,8 @@ static void __init prom_initialize_tce_table(void) /* Flag the first invalid entry */ prom_debug("ending prom_initialize_tce_table\n"); } -#endif +#endif /* __BIG_ENDIAN__ */ +#endif /* CONFIG_PPC64 */ /* * With CHRP SMP we need to use the OF to start the other processors. @@ -1776,7 +1814,6 @@ static void __init prom_initialize_tce_table(void) static void __init prom_hold_cpus(void) { unsigned long i; - unsigned int reg; phandle node; char type[64]; unsigned long *spinloop @@ -1785,6 +1822,18 @@ static void __init prom_hold_cpus(void) = (void *) LOW_ADDR(__secondary_hold_acknowledge); unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + /* + * On pseries, if RTAS supports "query-cpu-stopped-state", + * we skip this stage, the CPUs will be started by the + * kernel using RTAS. + */ + if ((of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) && + rtas_has_query_cpu_stopped) { + prom_printf("prom_hold_cpus: skipped\n"); + return; + } + prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -1802,6 +1851,9 @@ static void __init prom_hold_cpus(void) /* look for cpus */ for (node = 0; prom_next_node(&node); ) { + unsigned int cpu_no; + __be32 reg; + type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); if (strcmp(type, "cpu") != 0) @@ -1812,10 +1864,11 @@ static void __init prom_hold_cpus(void) if (strcmp(type, "okay") != 0) continue; - reg = -1; + reg = cpu_to_be32(-1); /* make sparse happy */ prom_getprop(node, "reg", ®, sizeof(reg)); + cpu_no = be32_to_cpu(reg); - prom_debug("cpu hw idx = %lu\n", reg); + prom_debug("cpu hw idx = %lu\n", cpu_no); /* Init the acknowledge var which will be reset by * the secondary cpu when it awakens from its OF @@ -1823,24 +1876,24 @@ static void __init prom_hold_cpus(void) */ *acknowledge = (unsigned long)-1; - if (reg != prom.cpu) { + if (cpu_no != prom.cpu) { /* Primary Thread of non-boot cpu or any thread */ - prom_printf("starting cpu hw idx %lu... ", reg); + prom_printf("starting cpu hw idx %lu... ", cpu_no); call_prom("start-cpu", 3, 0, node, - secondary_hold, reg); + secondary_hold, cpu_no); for (i = 0; (i < 100000000) && (*acknowledge == ((unsigned long)-1)); i++ ) mb(); - if (*acknowledge == reg) + if (*acknowledge == cpu_no) prom_printf("done\n"); else prom_printf("failed: %x\n", *acknowledge); } #ifdef CONFIG_SMP else - prom_printf("boot cpu hw idx %lu\n", reg); + prom_printf("boot cpu hw idx %lu\n", cpu_no); #endif /* CONFIG_SMP */ } @@ -1894,6 +1947,7 @@ static void __init prom_find_mmu(void) prom.memory = call_prom("open", 1, 1, ADDR("/memory")); prom_getprop(prom.chosen, "mmu", &prom.mmumap, sizeof(prom.mmumap)); + prom.mmumap = be32_to_cpu(prom.mmumap); if (!IHANDLE_VALID(prom.memory) || !IHANDLE_VALID(prom.mmumap)) of_workarounds &= ~OF_WA_CLAIM; /* hmmm */ } @@ -1905,17 +1959,19 @@ static void __init prom_init_stdout(void) { char *path = of_stdout_device; char type[16]; - u32 val; + phandle stdout_node; + __be32 val; if (prom_getprop(prom.chosen, "stdout", &val, sizeof(val)) <= 0) prom_panic("cannot find stdout"); - prom.stdout = val; + prom.stdout = be32_to_cpu(val); /* Get the full OF pathname of the stdout device */ memset(path, 0, 256); call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); - val = call_prom("instance-to-package", 1, 1, prom.stdout); + stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); + val = cpu_to_be32(stdout_node); prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", &val, sizeof(val)); prom_printf("OF stdout device is: %s\n", of_stdout_device); @@ -1924,9 +1980,9 @@ static void __init prom_init_stdout(void) /* If it's a display, note it */ memset(type, 0, sizeof(type)); - prom_getprop(val, "device_type", type, sizeof(type)); + prom_getprop(stdout_node, "device_type", type, sizeof(type)); if (strcmp(type, "display") == 0) - prom_setprop(val, path, "linux,boot-display", NULL, 0); + prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); } static int __init prom_find_machine_type(void) @@ -2081,6 +2137,22 @@ static void __init prom_check_displays(void) clut[2]) != 0) break; #endif /* CONFIG_LOGO_LINUX_CLUT224 */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX + if (prom_getprop(node, "linux,boot-display", NULL, 0) != + PROM_ERROR) { + u32 width, height, pitch, addr; + + prom_printf("Setting btext !\n"); + prom_getprop(node, "width", &width, 4); + prom_getprop(node, "height", &height, 4); + prom_getprop(node, "linebytes", &pitch, 4); + prom_getprop(node, "address", &addr, 4); + prom_printf("W=%d H=%d LB=%d addr=0x%x\n", + width, height, pitch, addr); + btext_setup_display(width, height, 8, pitch, addr); + } +#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ } } @@ -2116,8 +2188,10 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, return ret; } -#define dt_push_token(token, mem_start, mem_end) \ - do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0) +#define dt_push_token(token, mem_start, mem_end) do { \ + void *room = make_room(mem_start, mem_end, 4, 4); \ + *(__be32 *)room = cpu_to_be32(token); \ + } while(0) static unsigned long __init dt_find_string(char *str) { @@ -2290,7 +2364,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, dt_push_token(4, mem_start, mem_end); dt_push_token(soff, mem_start, mem_end); valp = make_room(mem_start, mem_end, 4, 4); - *(u32 *)valp = node; + *(__be32 *)valp = cpu_to_be32(node); } } @@ -2363,16 +2437,16 @@ static void __init flatten_device_tree(void) dt_struct_end = PAGE_ALIGN(mem_start); /* Finish header */ - hdr->boot_cpuid_phys = prom.cpu; - hdr->magic = OF_DT_HEADER; - hdr->totalsize = dt_struct_end - dt_header_start; - hdr->off_dt_struct = dt_struct_start - dt_header_start; - hdr->off_dt_strings = dt_string_start - dt_header_start; - hdr->dt_strings_size = dt_string_end - dt_string_start; - hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - dt_header_start; - hdr->version = OF_DT_VERSION; + hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu); + hdr->magic = cpu_to_be32(OF_DT_HEADER); + hdr->totalsize = cpu_to_be32(dt_struct_end - dt_header_start); + hdr->off_dt_struct = cpu_to_be32(dt_struct_start - dt_header_start); + hdr->off_dt_strings = cpu_to_be32(dt_string_start - dt_header_start); + hdr->dt_strings_size = cpu_to_be32(dt_string_end - dt_string_start); + hdr->off_mem_rsvmap = cpu_to_be32(((unsigned long)rsvmap) - dt_header_start); + hdr->version = cpu_to_be32(OF_DT_VERSION); /* Version 16 is not backward compatible */ - hdr->last_comp_version = 0x10; + hdr->last_comp_version = cpu_to_be32(0x10); /* Copy the reserve map in */ memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map)); @@ -2383,8 +2457,8 @@ static void __init flatten_device_tree(void) prom_printf("reserved memory map:\n"); for (i = 0; i < mem_reserve_cnt; i++) prom_printf(" %x - %x\n", - mem_reserve_map[i].base, - mem_reserve_map[i].size); + be64_to_cpu(mem_reserve_map[i].base), + be64_to_cpu(mem_reserve_map[i].size)); } #endif /* Bump mem_reserve_cnt to cause further reservations to fail @@ -2396,7 +2470,6 @@ static void __init flatten_device_tree(void) dt_string_start, dt_string_end); prom_printf("Device tree struct 0x%x -> 0x%x\n", dt_struct_start, dt_struct_end); - } #ifdef CONFIG_PPC_MAPLE @@ -2729,18 +2802,19 @@ static void __init fixup_device_tree(void) static void __init prom_find_boot_cpu(void) { - u32 getprop_rval; + __be32 rval; ihandle prom_cpu; phandle cpu_pkg; - prom.cpu = 0; - if (prom_getprop(prom.chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) + rval = 0; + if (prom_getprop(prom.chosen, "cpu", &rval, sizeof(rval)) <= 0) return; + prom_cpu = be32_to_cpu(rval); cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); - prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); - prom.cpu = getprop_rval; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); + prom.cpu = be32_to_cpu(rval); prom_debug("Booting CPU hw index = %lu\n", prom.cpu); } @@ -2749,15 +2823,15 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) { #ifdef CONFIG_BLK_DEV_INITRD if (r3 && r4 && r4 != 0xdeadbeef) { - unsigned long val; + __be64 val; prom_initrd_start = is_kernel_addr(r3) ? __pa(r3) : r3; prom_initrd_end = prom_initrd_start + r4; - val = prom_initrd_start; + val = cpu_to_be64(prom_initrd_start); prom_setprop(prom.chosen, "/chosen", "linux,initrd-start", &val, sizeof(val)); - val = prom_initrd_end; + val = cpu_to_be64(prom_initrd_end); prom_setprop(prom.chosen, "/chosen", "linux,initrd-end", &val, sizeof(val)); @@ -2914,7 +2988,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_displays(); -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && defined(__BIG_ENDIAN__) /* * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else * that uses the allocator, we need to make sure we get the top of memory @@ -2933,6 +3007,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_instantiate_rtas(); #ifdef CONFIG_PPC_POWERNV +#ifdef __BIG_ENDIAN__ /* Detect HAL and try instanciating it & doing takeover */ if (of_platform == PLATFORM_PSERIES_LPAR) { prom_query_opal(); @@ -2940,9 +3015,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_opal_hold_cpus(); prom_opal_takeover(); } - } else if (of_platform == PLATFORM_OPAL) + } else +#endif /* __BIG_ENDIAN__ */ + if (of_platform == PLATFORM_OPAL) prom_instantiate_opal(); -#endif +#endif /* CONFIG_PPC_POWERNV */ #ifdef CONFIG_PPC64 /* instantiate sml */ @@ -2953,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, put all CPUs in spin-loops. * * PowerMacs use a different mechanism to spin CPUs + * + * (This must be done after instanciating RTAS) */ if (of_platform != PLATFORM_POWERMAC && of_platform != PLATFORM_OPAL) @@ -2961,10 +3040,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * Fill in some infos for use by the kernel later on */ - if (prom_memory_limit) + if (prom_memory_limit) { + __be64 val = cpu_to_be64(prom_memory_limit); prom_setprop(prom.chosen, "/chosen", "linux,memory-limit", - &prom_memory_limit, - sizeof(prom_memory_limit)); + &val, sizeof(val)); + } #ifdef CONFIG_PPC64 if (prom_iommu_off) prom_setprop(prom.chosen, "/chosen", "linux,iommu-off", diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 3765da6be4f2..b0c263da219a 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -22,7 +22,8 @@ __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry -boot_command_line __prom_init_toc_start __prom_init_toc_end" +boot_command_line __prom_init_toc_start __prom_init_toc_end +btext_setup_display" NM="$1" OBJ="$2" diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 4e1331b8eb33..6295e646f78c 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -7,28 +7,27 @@ #include <linux/of_address.h> #include <asm/prom.h> -void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, - unsigned long *busno, unsigned long *phys, unsigned long *size) +void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window, + unsigned long *busno, unsigned long *phys, + unsigned long *size) { - const u32 *dma_window; u32 cells; - const unsigned char *prop; - - dma_window = dma_window_prop; + const __be32 *prop; /* busno is always one cell */ - *busno = *(dma_window++); + *busno = of_read_number(dma_window, 1); + dma_window++; prop = of_get_property(dn, "ibm,#dma-address-cells", NULL); if (!prop) prop = of_get_property(dn, "#address-cells", NULL); - cells = prop ? *(u32 *)prop : of_n_addr_cells(dn); + cells = prop ? of_read_number(prop, 1) : of_n_addr_cells(dn); *phys = of_read_number(dma_window, cells); dma_window += cells; prop = of_get_property(dn, "ibm,#dma-size-cells", NULL); - cells = prop ? *(u32 *)prop : of_n_size_cells(dn); + cells = prop ? of_read_number(prop, 1) : of_n_size_cells(dn); *size = of_read_number(dma_window, cells); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 98c2fc198712..9a0d24c390a3 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -975,16 +975,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = 8; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(task) < 0) - return -ESRCH; - bp = thread->ptrace_bps[0]; if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } - ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -997,11 +993,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ret = modify_user_hw_breakpoint(bp, &attr); if (ret) { - ptrace_put_breakpoints(task); return ret; } thread->ptrace_bps[0] = bp; - ptrace_put_breakpoints(task); thread->hw_brk = hw_brk; return 0; } @@ -1016,12 +1010,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, NULL, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; - ptrace_put_breakpoints(task); return PTR_ERR(bp); } - ptrace_put_breakpoints(task); - #endif /* CONFIG_HAVE_HW_BREAKPOINT */ task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -1440,24 +1431,19 @@ static long ppc_set_hwdebug(struct task_struct *child, if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) brk.type |= HW_BRK_TYPE_WRITE; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - /* * Check if the request is for 'range' breakpoints. We can * support it if range < 8 bytes. */ - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) { + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) len = bp_info->addr2 - bp_info->addr; - } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { - ptrace_put_breakpoints(child); + else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else return -EINVAL; - } bp = thread->ptrace_bps[0]; - if (bp) { - ptrace_put_breakpoints(child); + if (bp) return -ENOSPC; - } /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); @@ -1469,11 +1455,9 @@ static long ppc_set_hwdebug(struct task_struct *child, ptrace_triggered, NULL, child); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; - ptrace_put_breakpoints(child); return PTR_ERR(bp); } - ptrace_put_breakpoints(child); return 1; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ @@ -1517,16 +1501,12 @@ static long ppc_del_hwdebug(struct task_struct *child, long data) return -EINVAL; #ifdef CONFIG_HAVE_HW_BREAKPOINT - if (ptrace_get_breakpoints(child) < 0) - return -ESRCH; - bp = thread->ptrace_bps[0]; if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } else ret = -ENOENT; - ptrace_put_breakpoints(child); return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ if (child->thread.hw_brk.address == 0) diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S index ef46ba6e094f..f366fedb0872 100644 --- a/arch/powerpc/kernel/reloc_32.S +++ b/arch/powerpc/kernel/reloc_32.S @@ -166,7 +166,7 @@ ha16: /* R_PPC_ADDR16_LO */ lo16: cmpwi r4, R_PPC_ADDR16_LO - bne nxtrela + bne unknown_type lwz r4, 0(r9) /* r_offset */ lwz r0, 8(r9) /* r_addend */ add r0, r0, r3 @@ -191,6 +191,7 @@ nxtrela: dcbst r4,r7 sync /* Ensure the data is flushed before icbi */ icbi r4,r7 +unknown_type: cmpwi r8, 0 /* relasz = 0 ? */ ble done add r9, r9, r6 /* move to next entry in the .rela table */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 52add6f3e201..4cf674d7d5ae 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -91,7 +91,7 @@ static void unlock_rtas(unsigned long flags) * are designed only for very early low-level debugging, which * is why the token is hard-coded to 10. */ -static void call_rtas_display_status(char c) +static void call_rtas_display_status(unsigned char c) { struct rtas_args *args = &rtas.args; unsigned long s; @@ -100,11 +100,11 @@ static void call_rtas_display_status(char c) return; s = lock_rtas(); - args->token = 10; - args->nargs = 1; - args->nret = 1; - args->rets = (rtas_arg_t *)&(args->args[1]); - args->args[0] = (unsigned char)c; + args->token = cpu_to_be32(10); + args->nargs = cpu_to_be32(1); + args->nret = cpu_to_be32(1); + args->rets = &(args->args[1]); + args->args[0] = cpu_to_be32(c); enter_rtas(__pa(args)); @@ -204,7 +204,7 @@ void rtas_progress(char *s, unsigned short hex) { struct device_node *root; int width; - const int *p; + const __be32 *p; char *os; static int display_character, set_indicator; static int display_width, display_lines, form_feed; @@ -221,13 +221,13 @@ void rtas_progress(char *s, unsigned short hex) if ((root = of_find_node_by_path("/rtas"))) { if ((p = of_get_property(root, "ibm,display-line-length", NULL))) - display_width = *p; + display_width = be32_to_cpu(*p); if ((p = of_get_property(root, "ibm,form-feed", NULL))) - form_feed = *p; + form_feed = be32_to_cpu(*p); if ((p = of_get_property(root, "ibm,display-number-of-lines", NULL))) - display_lines = *p; + display_lines = be32_to_cpu(*p); row_width = of_get_property(root, "ibm,display-truncation-length", NULL); of_node_put(root); @@ -322,11 +322,11 @@ EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { - const int *tokp; + const __be32 *tokp; if (rtas.dev == NULL) return RTAS_UNKNOWN_SERVICE; tokp = of_get_property(rtas.dev, service, NULL); - return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; + return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; } EXPORT_SYMBOL(rtas_token); @@ -380,11 +380,11 @@ static char *__fetch_rtas_last_error(char *altbuf) bufsz = rtas_get_error_log_max(); - err_args.token = rtas_last_error_token; - err_args.nargs = 2; - err_args.nret = 1; - err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf); - err_args.args[1] = bufsz; + err_args.token = cpu_to_be32(rtas_last_error_token); + err_args.nargs = cpu_to_be32(2); + err_args.nret = cpu_to_be32(1); + err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf)); + err_args.args[1] = cpu_to_be32(bufsz); err_args.args[2] = 0; save_args = rtas.args; @@ -433,13 +433,13 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) s = lock_rtas(); rtas_args = &rtas.args; - rtas_args->token = token; - rtas_args->nargs = nargs; - rtas_args->nret = nret; - rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]); + rtas_args->token = cpu_to_be32(token); + rtas_args->nargs = cpu_to_be32(nargs); + rtas_args->nret = cpu_to_be32(nret); + rtas_args->rets = &(rtas_args->args[nargs]); va_start(list, outputs); for (i = 0; i < nargs; ++i) - rtas_args->args[i] = va_arg(list, rtas_arg_t); + rtas_args->args[i] = cpu_to_be32(va_arg(list, __u32)); va_end(list); for (i = 0; i < nret; ++i) @@ -449,13 +449,13 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ - if (rtas_args->rets[0] == -1) + if (be32_to_cpu(rtas_args->rets[0]) == -1) buff_copy = __fetch_rtas_last_error(NULL); if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) - outputs[i] = rtas_args->rets[i+1]; - ret = (nret > 0)? rtas_args->rets[0]: 0; + outputs[i] = be32_to_cpu(rtas_args->rets[i+1]); + ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0; unlock_rtas(s); @@ -588,8 +588,8 @@ bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; const struct indicator_elem { - u32 token; - u32 maxindex; + __be32 token; + __be32 maxindex; } *indicators; indicators = of_get_property(rtas.dev, "rtas-indicators", &proplen); @@ -599,10 +599,10 @@ bool rtas_indicator_present(int token, int *maxindex) count = proplen / sizeof(struct indicator_elem); for (i = 0; i < count; i++) { - if (indicators[i].token != token) + if (__be32_to_cpu(indicators[i].token) != token) continue; if (maxindex) - *maxindex = indicators[i].maxindex; + *maxindex = __be32_to_cpu(indicators[i].maxindex); return true; } @@ -1097,19 +1097,19 @@ void __init rtas_initialize(void) */ rtas.dev = of_find_node_by_name(NULL, "rtas"); if (rtas.dev) { - const u32 *basep, *entryp, *sizep; + const __be32 *basep, *entryp, *sizep; basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); sizep = of_get_property(rtas.dev, "rtas-size", NULL); if (basep != NULL && sizep != NULL) { - rtas.base = *basep; - rtas.size = *sizep; + rtas.base = __be32_to_cpu(*basep); + rtas.size = __be32_to_cpu(*sizep); entryp = of_get_property(rtas.dev, "linux,rtas-entry", NULL); if (entryp == NULL) /* Ugh */ rtas.entry = rtas.base; else - rtas.entry = *entryp; + rtas.entry = __be32_to_cpu(*entryp); } else rtas.dev = NULL; } @@ -1172,7 +1172,7 @@ int __init early_init_dt_scan_rtas(unsigned long node, static arch_spinlock_t timebase_lock; static u64 timebase = 0; -void __cpuinit rtas_give_timebase(void) +void rtas_give_timebase(void) { unsigned long flags; @@ -1189,7 +1189,7 @@ void __cpuinit rtas_give_timebase(void) local_irq_restore(flags); } -void __cpuinit rtas_take_timebase(void) +void rtas_take_timebase(void) { while (!timebase) barrier(); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 63d051f5b7a5..3d261c071fc8 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -436,7 +436,8 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { - const int *intserv; + const __be32 *intserv; + __be32 cpu_be; int j, len; DBG(" * %s...\n", dn->full_name); @@ -450,15 +451,17 @@ void __init smp_setup_cpu_maps(void) } else { DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); intserv = of_get_property(dn, "reg", NULL); - if (!intserv) - intserv = &cpu; /* assume logical == phys */ + if (!intserv) { + cpu_be = cpu_to_be32(cpu); + intserv = &cpu_be; /* assume logical == phys */ + } } for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", - j, cpu, intserv[j]); + j, cpu, be32_to_cpu(intserv[j])); set_cpu_present(cpu, true); - set_hard_smp_processor_id(cpu, intserv[j]); + set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a8f54ecb091f..a4bbcae72578 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -38,6 +38,7 @@ #include <asm/serial.h> #include <asm/udbg.h> #include <asm/mmu_context.h> +#include <asm/epapr_hcalls.h> #include "setup.h" @@ -128,6 +129,8 @@ notrace void __init machine_init(u64 dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); + epapr_paravirt_early_init(); + early_init_mmu(); probe_machine(); @@ -326,5 +329,4 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff */ mmu_context_init(); - } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e379d3fd1694..278ca93e1f28 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define DEBUG #include <linux/export.h> #include <linux/string.h> @@ -66,6 +66,7 @@ #include <asm/code-patching.h> #include <asm/kvm_ppc.h> #include <asm/hugetlb.h> +#include <asm/epapr_hcalls.h> #include "setup.h" @@ -76,7 +77,7 @@ #endif int boot_cpuid = 0; -int __initdata spinning_secondaries; +int spinning_secondaries; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions @@ -215,6 +216,8 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); + epapr_paravirt_early_init(); + /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); @@ -229,6 +232,8 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + kvm_cma_reserve(); + /* * Reserve any gigantic pages requested on the command line. * memblock needs to have been initialized by the time this is @@ -237,6 +242,18 @@ void __init early_setup(unsigned long dt_ptr) reserve_hugetlb_gpages(); DBG(" <- early_setup()\n"); + +#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX + /* + * This needs to be done *last* (after the above DBG() even) + * + * Right after we return from this function, we turn on the MMU + * which means the real-mode access trick that btext does will + * no longer work, it needs to switch to using a real MMU + * mapping. This call will ensure that it does + */ + btext_map(); +#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ } #ifdef CONFIG_SMP @@ -305,14 +322,14 @@ static void __init initialize_cache_info(void) * d-cache and i-cache sizes... -Peter */ if (num_cpus == 1) { - const u32 *sizep, *lsizep; + const __be32 *sizep, *lsizep; u32 size, lsize; size = 0; lsize = cur_cpu_spec->dcache_bsize; sizep = of_get_property(np, "d-cache-size", NULL); if (sizep != NULL) - size = *sizep; + size = be32_to_cpu(*sizep); lsizep = of_get_property(np, "d-cache-block-size", NULL); /* fallback if block size missing */ @@ -321,8 +338,8 @@ static void __init initialize_cache_info(void) "d-cache-line-size", NULL); if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) + lsize = be32_to_cpu(*lsizep); + if (sizep == NULL || lsizep == NULL) DBG("Argh, can't find dcache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); @@ -335,7 +352,7 @@ static void __init initialize_cache_info(void) lsize = cur_cpu_spec->icache_bsize; sizep = of_get_property(np, "i-cache-size", NULL); if (sizep != NULL) - size = *sizep; + size = be32_to_cpu(*sizep); lsizep = of_get_property(np, "i-cache-block-size", NULL); if (lsizep == NULL) @@ -343,8 +360,8 @@ static void __init initialize_cache_info(void) "i-cache-line-size", NULL); if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) + lsize = be32_to_cpu(*lsizep); + if (sizep == NULL || lsizep == NULL) DBG("Argh, can't find icache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); @@ -609,8 +626,6 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff */ mmu_context_init(); - kvm_linear_init(); - /* Interrupt code needs to be 64K-aligned */ if ((unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", @@ -701,8 +716,7 @@ void __init setup_per_cpu_areas(void) #endif -#ifdef CONFIG_PPC_INDIRECT_IO +#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); -#endif /* CONFIG_PPC_INDIRECT_IO */ - +#endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 201385c3a1ae..bebdf1a1a540 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, * altivec/spe instructions at some point. */ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - int sigret, int ctx_has_vsx_region) + struct mcontext __user *tm_frame, int sigret, + int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -435,7 +436,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * use altivec. Since VSCR only contains 32 bits saved in the least * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH + * Note that the current VRSAVE value is in the SPR at this point. */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; #endif /* CONFIG_ALTIVEC */ @@ -475,6 +479,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (__put_user(msr, &frame->mc_gregs[PT_MSR])) return 1; + /* We need to write 0 the MSR top 32 bits in the tm frame so that we + * can check it on the restore to see if TM is active + */ + if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { /* Set up the sigreturn trampoline: li r0,sigret; sc */ if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) @@ -550,6 +560,8 @@ static int save_tm_user_regs(struct pt_regs *regs, * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; @@ -689,6 +701,8 @@ static long restore_user_regs(struct pt_regs *regs, /* Always get VRSAVE back */ if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) return 1; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ if (copy_fpr_from_user(current, &sr->mc_fregs)) return 1; @@ -747,7 +761,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { long err; - unsigned long msr; + unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; #endif @@ -802,6 +816,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, __get_user(current->thread.transact_vrsave, (u32 __user *)&tm_sr->mc_vregs[32])) return 1; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); @@ -852,8 +868,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + /* Get the top half of the MSR */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + /* Pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -952,6 +971,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; + struct mcontext __user *tm_frame = NULL; void __user *addr; unsigned long newsp = 0; int sigret; @@ -985,23 +1005,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { - if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, - &rt_sf->uc_transact.uc_mcontext, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif - if (save_user_regs(regs, frame, sigret, 1)) + { + if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; + } regs->link = tramp; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) - || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), - &rt_sf->uc_transact.uc_regs)) + || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; } else @@ -1170,7 +1191,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) + || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) return -EFAULT; @@ -1233,7 +1254,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) goto bad; - if (MSR_TM_SUSPENDED(msr_hi<<32)) { + if (MSR_TM_ACTIVE(msr_hi<<32)) { /* We only recheckpoint on return if we're * transaction. */ @@ -1392,6 +1413,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, { struct sigcontext __user *sc; struct sigframe __user *frame; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -1425,6 +1447,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; if (MSR_TM_ACTIVE(regs->msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, sigret)) @@ -1432,8 +1455,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } else #endif - if (save_user_regs(regs, &frame->mctx, sigret, 1)) + { + if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) goto badframe; + } regs->link = tramp; @@ -1481,16 +1506,22 @@ badframe: long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { + struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; void __user *addr; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct mcontext __user *mcp, *tm_mcp; + unsigned long msr_hi; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sc = &sf->sctx; addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1507,11 +1538,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif set_current_blocked(&set); - sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mcp = (struct mcontext __user *)&sf->mctx; + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; + if (MSR_TM_ACTIVE(msr_hi<<32)) { + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + goto badframe; + } else +#endif + { + sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); + addr = sr; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + } set_thread_flag(TIF_RESTOREALL); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 345947367ec0..f93ec2835a13 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -96,8 +96,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long msr = regs->msr; long err = 0; - flush_fp_to_thread(current); - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); @@ -114,6 +112,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); @@ -217,6 +217,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); if (msr & MSR_VEC) err |= __put_user(current->thread.transact_vrsave, @@ -346,16 +348,18 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ - if (v_regs != 0 && (msr & MSR_VEC) != 0) + if (v_regs != NULL && (msr & MSR_VEC) != 0) err |= __copy_from_user(current->thread.vr, v_regs, 33 * sizeof(vector128)); else if (current->thread.used_vr) memset(current->thread.vr, 0, 33 * sizeof(vector128)); /* Always get VRSAVE back */ - if (v_regs != 0) + if (v_regs != NULL) err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); else current->thread.vrsave = 0; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ err |= copy_fpr_from_user(current, &sc->fp_regs); @@ -410,6 +414,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ @@ -459,7 +467,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, tm_v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ - if (v_regs != 0 && tm_v_regs != 0 && (msr & MSR_VEC) != 0) { + if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) { err |= __copy_from_user(current->thread.vr, v_regs, 33 * sizeof(vector128)); err |= __copy_from_user(current->thread.transact_vr, tm_v_regs, @@ -470,7 +478,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, memset(current->thread.transact_vr, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ - if (v_regs != 0 && tm_v_regs != 0) { + if (v_regs != NULL && tm_v_regs != NULL) { err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); err |= __get_user(current->thread.transact_vrsave, @@ -480,6 +488,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, current->thread.vrsave = 0; current->thread.transact_vrsave = 0; } + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ err |= copy_fpr_from_user(current, &sc->fp_regs); @@ -505,8 +515,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this: */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -654,7 +662,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; - if (MSR_TM_SUSPENDED(msr)) { + if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ee7ac5e6e28a..8e59abc237d7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -81,6 +81,28 @@ int smt_enabled_at_boot = 1; static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; +/* + * Returns 1 if the specified cpu should be brought up during boot. + * Used to inhibit booting threads if they've been disabled or + * limited on the command line + */ +int smp_generic_cpu_bootable(unsigned int nr) +{ + /* Special case - we inhibit secondary thread startup + * during boot if the user requests it. + */ + if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { + if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) + return 0; + if (smt_enabled_at_boot + && cpu_thread_in_core(nr) >= smt_enabled_at_boot) + return 0; + } + + return 1; +} + + #ifdef CONFIG_PPC64 int smp_generic_kick_cpu(int nr) { @@ -172,7 +194,7 @@ int smp_request_message_ipi(int virq, int msg) #endif err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, - smp_ipi_name[msg], 0); + smp_ipi_name[msg], NULL); WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", virq, smp_ipi_name[msg], err); @@ -210,6 +232,12 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) smp_ops->cause_ipi(cpu, info->data); } +#ifdef __BIG_ENDIAN__ +#define IPI_MESSAGE(A) (1 << (24 - 8 * (A))) +#else +#define IPI_MESSAGE(A) (1 << (8 * (A))) +#endif + irqreturn_t smp_ipi_demux(void) { struct cpu_messages *info = &__get_cpu_var(ipi_message); @@ -219,19 +247,14 @@ irqreturn_t smp_ipi_demux(void) do { all = xchg(&info->messages, 0); - -#ifdef __BIG_ENDIAN - if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) + if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) generic_smp_call_function_interrupt(); - if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) + if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); - if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) + if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE)) generic_smp_call_function_single_interrupt(); - if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) + if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) debug_ipi_action(0, NULL); -#else -#error Unsupported ENDIAN -#endif } while (info->messages); return IRQ_HANDLED; @@ -480,7 +503,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) secondary_ti = current_set[cpu] = ti; } -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc, c; @@ -574,6 +597,22 @@ out: return id; } +/* Return the value of the chip-id property corresponding + * to the given logical cpu. + */ +int cpu_to_chip_id(int cpu) +{ + struct device_node *np; + + np = of_get_cpu_node(cpu, NULL); + if (!np) + return -1; + + of_node_put(np); + return of_get_ibm_chip_id(np); +} +EXPORT_SYMBOL(cpu_to_chip_id); + /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) { @@ -587,6 +626,33 @@ int cpu_first_thread_of_core(int core) } EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); +static void traverse_siblings_chip_id(int cpu, bool add, int chipid) +{ + const struct cpumask *mask; + struct device_node *np; + int i, plen; + const __be32 *prop; + + mask = add ? cpu_online_mask : cpu_present_mask; + for_each_cpu(i, mask) { + np = of_get_cpu_node(i, NULL); + if (!np) + continue; + prop = of_get_property(np, "ibm,chip-id", &plen); + if (prop && plen == sizeof(int) && + of_read_number(prop, 1) == chipid) { + if (add) { + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); + } else { + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); + } + } + of_node_put(np); + } +} + /* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ @@ -609,11 +675,51 @@ static struct device_node *cpu_to_l2cache(int cpu) return cache; } +static void traverse_core_siblings(int cpu, bool add) +{ + struct device_node *l2_cache, *np; + const struct cpumask *mask; + int i, chip, plen; + const __be32 *prop; + + /* First see if we have ibm,chip-id properties in cpu nodes */ + np = of_get_cpu_node(cpu, NULL); + if (np) { + chip = -1; + prop = of_get_property(np, "ibm,chip-id", &plen); + if (prop && plen == sizeof(int)) + chip = of_read_number(prop, 1); + of_node_put(np); + if (chip >= 0) { + traverse_siblings_chip_id(cpu, add, chip); + return; + } + } + + l2_cache = cpu_to_l2cache(cpu); + mask = add ? cpu_online_mask : cpu_present_mask; + for_each_cpu(i, mask) { + np = cpu_to_l2cache(i); + if (!np) + continue; + if (np == l2_cache) { + if (add) { + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); + } else { + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); + } + } + of_node_put(np); + } + of_node_put(l2_cache); +} + /* Activate a secondary processor. */ -__cpuinit void start_secondary(void *unused) +void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); - struct device_node *l2_cache; int i, base; atomic_inc(&init_mm.mm_count); @@ -637,12 +743,10 @@ __cpuinit void start_secondary(void *unused) vdso_getcpu_init(); #endif - notify_cpu_starting(cpu); - set_cpu_online(cpu, true); /* Update sibling maps */ base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { - if (cpu_is_offline(base + i)) + if (cpu_is_offline(base + i) && (cpu != base + i)) continue; cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); @@ -654,18 +758,11 @@ __cpuinit void start_secondary(void *unused) cpumask_set_cpu(cpu, cpu_core_mask(base + i)); cpumask_set_cpu(base + i, cpu_core_mask(cpu)); } - l2_cache = cpu_to_l2cache(cpu); - for_each_online_cpu(i) { - struct device_node *np = cpu_to_l2cache(i); - if (!np) - continue; - if (np == l2_cache) { - cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, cpu_core_mask(cpu)); - } - of_node_put(np); - } - of_node_put(l2_cache); + traverse_core_siblings(cpu, true); + + smp_wmb(); + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); local_irq_enable(); @@ -717,7 +814,6 @@ int arch_sd_sibling_asym_packing(void) #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { - struct device_node *l2_cache; int cpu = smp_processor_id(); int base, i; int err; @@ -737,20 +833,7 @@ int __cpu_disable(void) cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); } - - l2_cache = cpu_to_l2cache(cpu); - for_each_present_cpu(i) { - struct device_node *np = cpu_to_l2cache(i); - if (!np) - continue; - if (np == l2_cache) { - cpumask_clear_cpu(cpu, cpu_core_mask(i)); - cpumask_clear_cpu(i, cpu_core_mask(cpu)); - } - of_node_put(np); - } - of_node_put(l2_cache); - + traverse_core_siblings(cpu, false); return 0; } diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c deleted file mode 100644 index 29b2f81dd709..000000000000 --- a/arch/powerpc/kernel/softemu8xx.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Software emulation of some PPC instructions for the 8xx core. - * - * Copyright (C) 1998 Dan Malek (dmalek@jlc.net) - * - * Software floating emuation for the MPC8xx processor. I did this mostly - * because it was easier than trying to get the libraries compiled for - * software floating point. The goal is still to get the libraries done, - * but I lost patience and needed some hacks to at least get init and - * shells running. The first problem is the setjmp/longjmp that save - * and restore the floating point registers. - * - * For this emulation, our working registers are found on the register - * save area. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/user.h> -#include <linux/interrupt.h> - -#include <asm/pgtable.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -/* Eventually we may need a look-up table, but this works for now. -*/ -#define LFS 48 -#define LFD 50 -#define LFDU 51 -#define STFD 54 -#define STFDU 55 -#define FMR 63 - -void print_8xx_pte(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - - printk(" pte @ 0x%8lx: ", addr); - pgd = pgd_offset(mm, addr & PAGE_MASK); - if (pgd) { - pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK), - addr & PAGE_MASK); - if (pmd && pmd_present(*pmd)) { - pte = pte_offset_kernel(pmd, addr & PAGE_MASK); - if (pte) { - printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n", - (long)pgd, (long)pte, (long)pte_val(*pte)); -#define pp ((long)pte_val(*pte)) - printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx " - "CI: %lx v: %lx\n", - pp>>12, /* rpn */ - (pp>>10)&3, /* pp */ - (pp>>3)&1, /* small */ - (pp>>2)&1, /* shared */ - (pp>>1)&1, /* cache inhibit */ - pp&1 /* valid */ - ); -#undef pp - } - else { - printk("no pte\n"); - } - } - else { - printk("no pmd\n"); - } - } - else { - printk("no pgd\n"); - } -} - -int get_8xx_pte(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int retval = 0; - - pgd = pgd_offset(mm, addr & PAGE_MASK); - if (pgd) { - pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK), - addr & PAGE_MASK); - if (pmd && pmd_present(*pmd)) { - pte = pte_offset_kernel(pmd, addr & PAGE_MASK); - if (pte) { - retval = (int)pte_val(*pte); - } - } - } - return retval; -} - -/* - * We return 0 on success, 1 on unimplemented instruction, and EFAULT - * if a load/store faulted. - */ -int Soft_emulate_8xx(struct pt_regs *regs) -{ - u32 inst, instword; - u32 flreg, idxreg, disp; - int retval; - s16 sdisp; - u32 *ea, *ip; - - retval = 0; - - instword = *((u32 *)regs->nip); - inst = instword >> 26; - - flreg = (instword >> 21) & 0x1f; - idxreg = (instword >> 16) & 0x1f; - disp = instword & 0xffff; - - ea = (u32 *)(regs->gpr[idxreg] + disp); - ip = (u32 *)¤t->thread.TS_FPR(flreg); - - switch ( inst ) - { - case LFD: - /* this is a 16 bit quantity that is sign extended - * so use a signed short here -- Cort - */ - sdisp = (instword & 0xffff); - ea = (u32 *)(regs->gpr[idxreg] + sdisp); - if (copy_from_user(ip, ea, sizeof(double))) - retval = -EFAULT; - break; - - case LFDU: - if (copy_from_user(ip, ea, sizeof(double))) - retval = -EFAULT; - else - regs->gpr[idxreg] = (u32)ea; - break; - case LFS: - sdisp = (instword & 0xffff); - ea = (u32 *)(regs->gpr[idxreg] + sdisp); - if (copy_from_user(ip, ea, sizeof(float))) - retval = -EFAULT; - break; - case STFD: - /* this is a 16 bit quantity that is sign extended - * so use a signed short here -- Cort - */ - sdisp = (instword & 0xffff); - ea = (u32 *)(regs->gpr[idxreg] + sdisp); - if (copy_to_user(ea, ip, sizeof(double))) - retval = -EFAULT; - break; - - case STFDU: - if (copy_to_user(ea, ip, sizeof(double))) - retval = -EFAULT; - else - regs->gpr[idxreg] = (u32)ea; - break; - case FMR: - /* assume this is a fp move -- Cort */ - memcpy(ip, ¤t->thread.TS_FPR((instword>>11)&0x1f), - sizeof(double)); - break; - default: - retval = 1; - printk("Bad emulation %s/%d\n" - " NIP: %08lx instruction: %08x opcode: %x " - "A: %x B: %x C: %x code: %x rc: %x\n", - current->comm,current->pid, - regs->nip, - instword,inst, - (instword>>16)&0x1f, - (instword>>11)&0x1f, - (instword>>6)&0x1f, - (instword>>1)&0x3ff, - instword&1); - { - int pa; - print_8xx_pte(current->mm,regs->nip); - pa = get_8xx_pte(current->mm,regs->nip) & PAGE_MASK; - pa |= (regs->nip & ~PAGE_MASK); - pa = (unsigned long)__va(pa); - printk("Kernel VA for NIP %x ", pa); - print_8xx_pte(current->mm,pa); - } - } - - if (retval == 0) - regs->nip += 4; - - return retval; -} diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 86ac1d90d02b..22045984835f 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -46,10 +46,19 @@ #define SL_r29 0xe8 #define SL_r30 0xf0 #define SL_r31 0xf8 -#define SL_SIZE SL_r31+8 +#define SL_SPRG1 0x100 +#define SL_TCR 0x108 +#define SL_SIZE SL_TCR+8 /* these macros rely on the save area being * pointed to by r11 */ + +#define SAVE_SPR(register) \ + mfspr r0, SPRN_##register ;\ + std r0, SL_##register(r11) +#define RESTORE_SPR(register) \ + ld r0, SL_##register(r11) ;\ + mtspr SPRN_##register, r0 #define SAVE_SPECIAL(special) \ mf##special r0 ;\ std r0, SL_##special(r11) @@ -103,8 +112,15 @@ _GLOBAL(swsusp_arch_suspend) SAVE_REGISTER(r30) SAVE_REGISTER(r31) SAVE_SPECIAL(MSR) - SAVE_SPECIAL(SDR1) SAVE_SPECIAL(XER) +#ifdef CONFIG_PPC_BOOK3S_64 + SAVE_SPECIAL(SDR1) +#else + SAVE_SPR(TCR) + + /* Save SPRG1, SPRG1 be used save paca */ + SAVE_SPR(SPRG1) +#endif /* we push the stack up 128 bytes but don't store the * stack pointer on the stack like a real stackframe */ @@ -151,6 +167,7 @@ copy_page_loop: bne+ copyloop nothing_to_copy: +#ifdef CONFIG_PPC_BOOK3S_64 /* flush caches */ lis r3, 0x10 mtctr r3 @@ -167,6 +184,7 @@ nothing_to_copy: sync tlbia +#endif ld r11,swsusp_save_area_ptr@toc(r2) @@ -208,16 +226,39 @@ nothing_to_copy: RESTORE_REGISTER(r29) RESTORE_REGISTER(r30) RESTORE_REGISTER(r31) + +#ifdef CONFIG_PPC_BOOK3S_64 /* can't use RESTORE_SPECIAL(MSR) */ ld r0, SL_MSR(r11) mtmsrd r0, 0 RESTORE_SPECIAL(SDR1) +#else + /* Restore SPRG1, be used to save paca */ + ld r0, SL_SPRG1(r11) + mtsprg 1, r0 + + RESTORE_SPECIAL(MSR) + + /* Restore TCR and clear any pending bits in TSR. */ + RESTORE_SPR(TCR) + lis r0, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h + mtspr SPRN_TSR, r0 + + /* Kick decrementer */ + li r0, 1 + mtdec r0 + + /* Invalidate all tlbs */ + bl _tlbil_all +#endif RESTORE_SPECIAL(XER) sync addi r1,r1,-128 +#ifdef CONFIG_PPC_BOOK3S_64 bl slb_flush_and_rebolt +#endif bl do_after_copyback addi r1,r1,128 diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S index 11a39307dd71..0f204053e5b5 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -141,6 +141,14 @@ _GLOBAL(swsusp_arch_resume) lis r11,swsusp_save_area@h ori r11,r11,swsusp_save_area@l + /* + * Mappings from virtual addresses to physical addresses may be + * different than they were prior to restoring hibernation state. + * Invalidate the TLB so that the boot CPU is using the new + * mappings. + */ + bl _tlbil_all + lwz r4,SL_SPRG0(r11) mtsprg 0,r4 lwz r4,SL_SPRG1(r11) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e68a84568b8b..b4e667663d9b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/pmc.h> +#include <asm/firmware.h> #include "cacheinfo.h" @@ -179,15 +180,25 @@ SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); SYSFS_PMCSETUP(pir, SPRN_PIR); +/* + Lets only enable read for phyp resources and + enable write when needed with a separate function. + Lets be conservative and default to pseries. +*/ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); -static DEVICE_ATTR(purr, 0600, show_purr, store_purr); +static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ + attr->attr.mode |= 0200; +} + static ssize_t show_dscr_default(struct device *dev, struct device_attribute *attr, char *buf) { @@ -341,7 +352,7 @@ static struct device_attribute pa6t_attrs[] = { #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ -static void __cpuinit register_cpu_online(unsigned int cpu) +static void register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; @@ -394,8 +405,11 @@ static void __cpuinit register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { + if (!firmware_has_feature(FW_FEATURE_LPAR)) + add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + } if (cpu_has_feature(CPU_FTR_SPURR)) device_create_file(s, &dev_attr_spurr); @@ -502,7 +516,7 @@ ssize_t arch_cpu_release(const char *buf, size_t count) #endif /* CONFIG_HOTPLUG_CPU */ -static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, +static int sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; @@ -522,7 +536,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata sysfs_cpu_nb = { +static struct notifier_block sysfs_cpu_nb = { .notifier_call = sysfs_cpu_notify, }; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 5fc29ad7e26f..192b051df97e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -210,18 +210,18 @@ static u64 scan_dispatch_log(u64 stop_tb) if (!dtl) return 0; - if (i == vpa->dtl_idx) + if (i == be64_to_cpu(vpa->dtl_idx)) return 0; - while (i < vpa->dtl_idx) { + while (i < be64_to_cpu(vpa->dtl_idx)) { if (dtl_consumer) dtl_consumer(dtl, i); - dtb = dtl->timebase; - tb_delta = dtl->enqueue_to_dispatch_time + - dtl->ready_to_enqueue_time; + dtb = be64_to_cpu(dtl->timebase); + tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + + be32_to_cpu(dtl->ready_to_enqueue_time); barrier(); - if (i + N_DISPATCH_LOG < vpa->dtl_idx) { + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { /* buffer has overflowed */ - i = vpa->dtl_idx - N_DISPATCH_LOG; + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); continue; } @@ -269,7 +269,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb) { u64 stolen = 0; - if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) { + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { stolen = scan_dispatch_log(stop_tb); get_paca()->system_time -= stolen; } @@ -612,7 +612,7 @@ unsigned long long sched_clock(void) static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; - const unsigned int *fp; + const __be32 *fp; int found = 0; /* The cpu node should have timebase and clock frequency properties */ @@ -631,7 +631,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } -/* should become __cpuinit when secondary_cpu_time_init also is */ void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -1050,7 +1049,7 @@ static int __init rtc_init(void) pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); - return PTR_RET(pdev); + return PTR_ERR_OR_ZERO(pdev); } module_init(rtc_init); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2da67e7a16d5..cd809eaa8b5c 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -79,6 +79,11 @@ _GLOBAL(tm_abort) TABORT(R3) blr + .section ".toc","aw" +DSCR_DEFAULT: + .tc dscr_default[TC],dscr_default + + .section ".text" /* void tm_reclaim(struct thread_struct *thread, * unsigned long orig_msr, @@ -112,9 +117,19 @@ _GLOBAL(tm_reclaim) std r3, STACK_PARAM(0)(r1) SAVE_NVGPRS(r1) + /* We need to setup MSR for VSX register save instructions. Here we + * also clear the MSR RI since when we do the treclaim, we won't have a + * valid kernel pointer for a while. We clear RI here as it avoids + * adding another mtmsr closer to the treclaim. This makes the region + * maked as non-recoverable wider than it needs to be but it saves on + * inserting another mtmsrd later. + */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP + li r16, MSR_RI + ori r16, r16, MSR_EE /* IRQs hard off */ + andc r15, r15, r16 oris r15, r15, MSR_VEC@h #ifdef CONFIG_VSX BEGIN_FTR_SECTION @@ -146,10 +161,10 @@ _GLOBAL(tm_reclaim) mfvscr vr0 li r6, THREAD_TRANSACT_VSCR stvx vr0, r3, r6 +dont_backup_vec: mfspr r0, SPRN_VRSAVE std r0, THREAD_TRANSACT_VRSAVE(r3) -dont_backup_vec: andi. r0, r4, MSR_FP beq dont_backup_fp @@ -178,11 +193,18 @@ dont_backup_fp: std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) + /* Store the PPR in r11 and reset to decent value */ + std r11, GPR11(r1) /* Temporary stash */ + mfspr r11, SPRN_PPR + HMT_MEDIUM + /* Now get some more GPRS free */ std r7, GPR7(r1) /* Temporary stash */ std r12, GPR12(r1) /* '' '' '' */ ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */ + std r11, THREAD_TM_PPR(r12) /* Store PPR and free r11 */ + addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ /* Make r7 look like an exception frame so that we @@ -194,15 +216,19 @@ dont_backup_fp: SAVE_GPR(0, r7) /* user r0 */ SAVE_GPR(2, r7) /* user r2 */ SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_4GPRS(8, r7) /* user r8-r11 */ + SAVE_GPR(8, r7) /* user r8 */ + SAVE_GPR(9, r7) /* user r9 */ + SAVE_GPR(10, r7) /* user r10 */ ld r3, PACATMSCRATCH(r13) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ - ld r5, GPR12(r1) /* user r12 */ - GET_SCRATCH0(6) /* user r13 */ + ld r5, GPR11(r1) /* user r11 */ + ld r6, GPR12(r1) /* user r12 */ + GET_SCRATCH0(8) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) - std r5, GPR12(r7) - std r6, GPR13(r7) + std r5, GPR11(r7) + std r6, GPR12(r7) + std r8, GPR13(r7) SAVE_NVGPRS(r7) /* user r14-r31 */ @@ -224,6 +250,14 @@ dont_backup_fp: std r5, _CCR(r7) std r6, _XER(r7) + + /* ******************** TAR, DSCR ********** */ + mfspr r3, SPRN_TAR + mfspr r4, SPRN_DSCR + + std r3, THREAD_TM_TAR(r12) + std r4, THREAD_TM_DSCR(r12) + /* MSR and flags: We don't change CRs, and we don't need to alter * MSR. */ @@ -239,7 +273,7 @@ dont_backup_fp: std r3, THREAD_TM_TFHAR(r12) std r4, THREAD_TM_TFIAR(r12) - /* AMR and PPR are checkpointed too, but are unsupported by Linux. */ + /* AMR is checkpointed too, but is unsupported by Linux. */ /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ @@ -255,6 +289,12 @@ dont_backup_fp: mtcr r4 mtlr r0 ld r2, 40(r1) + + /* Load system default DSCR */ + ld r4, DSCR_DEFAULT@toc(r2) + ld r0, 0(r4) + mtspr SPRN_DSCR, r0 + blr @@ -322,11 +362,11 @@ _GLOBAL(tm_recheckpoint) lvx vr0, r3, r5 mtvscr vr0 REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ +dont_restore_vec: ld r5, THREAD_VRSAVE(r3) mtspr SPRN_VRSAVE, r5 #endif -dont_restore_vec: andi. r0, r4, MSR_FP beq dont_restore_fp @@ -338,35 +378,52 @@ dont_restore_fp: mtmsr r6 /* FP/Vec off again! */ restore_gprs: + /* ******************** CR,LR,CCR,MSR ********** */ - ld r3, _CTR(r7) - ld r4, _LINK(r7) - ld r5, _CCR(r7) - ld r6, _XER(r7) + ld r4, _CTR(r7) + ld r5, _LINK(r7) + ld r6, _CCR(r7) + ld r8, _XER(r7) - mtctr r3 - mtlr r4 - mtcr r5 - mtxer r6 + mtctr r4 + mtlr r5 + mtcr r6 + mtxer r8 - /* MSR and flags: We don't change CRs, and we don't need to alter - * MSR. + /* ******************** TAR ******************** */ + ld r4, THREAD_TM_TAR(r3) + mtspr SPRN_TAR, r4 + + /* Load up the PPR and DSCR in GPRs only at this stage */ + ld r5, THREAD_TM_DSCR(r3) + ld r6, THREAD_TM_PPR(r3) + + /* Clear the MSR RI since we are about to change R1. EE is already off */ + li r4, 0 + mtmsrd r4, 1 REST_4GPRS(0, r7) /* GPR0-3 */ - REST_GPR(4, r7) /* GPR4-6 */ - REST_GPR(5, r7) - REST_GPR(6, r7) + REST_GPR(4, r7) /* GPR4 */ REST_4GPRS(8, r7) /* GPR8-11 */ REST_2GPRS(12, r7) /* GPR12-13 */ REST_NVGPRS(r7) /* GPR14-31 */ - ld r7, GPR7(r7) /* GPR7 */ + /* Load up PPR and DSCR here so we don't run with user values for long + */ + mtspr SPRN_DSCR, r5 + mtspr SPRN_PPR, r6 + + REST_GPR(5, r7) /* GPR5-7 */ + REST_GPR(6, r7) + ld r7, GPR7(r7) /* Commit register state as checkpointed state: */ TRECHKPT + HMT_MEDIUM + /* Our transactional state has now changed. * * Now just get out of here. Transactional (current) state will be @@ -377,6 +434,10 @@ restore_gprs: GET_PACA(r13) GET_SCRATCH0(r1) + /* R1 is restored, so we are recoverable again. EE is still off */ + li r4, MSR_RI + mtmsrd r4, 1 + REST_NVGPRS(r1) addi r1, r1, TM_FRAME_SIZE @@ -385,6 +446,12 @@ restore_gprs: mtcr r4 mtlr r0 ld r2, 40(r1) + + /* Load system default DSCR */ + ld r4, DSCR_DEFAULT@toc(r2) + ld r0, 0(r4) + mtspr SPRN_DSCR, r0 + blr /* ****************************************************************** */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f18c79c324ef..f783c932faeb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -44,9 +44,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/pmc.h> -#ifdef CONFIG_PPC32 #include <asm/reg.h> -#endif #ifdef CONFIG_PMAC_BACKLIGHT #include <asm/backlight.h> #endif @@ -62,6 +60,7 @@ #include <asm/switch_to.h> #include <asm/tm.h> #include <asm/debug.h> +#include <sysdev/fsl_pci.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -567,6 +566,8 @@ int machine_check_e500(struct pt_regs *regs) if (reason & MCSR_BUS_RBERR) { if (fsl_rio_mcheck_exception(regs)) return 1; + if (fsl_pci_mcheck_exception(regs)) + return 1; } printk("Machine check in kernel mode.\n"); @@ -866,6 +867,10 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword) u8 val; u32 shift = 8 * (3 - (pos & 0x3)); + /* if process is 32-bit, clear upper 32 bits of EA */ + if ((regs->msr & MSR_64BIT) == 0) + EA &= 0xFFFFFFFF; + switch ((instword & PPC_INST_STRING_MASK)) { case PPC_INST_LSWX: case PPC_INST_LSWI: @@ -960,7 +965,7 @@ static int emulate_instruction(struct pt_regs *regs) u32 instword; u32 rd; - if (!user_mode(regs) || (regs->msr & MSR_LE)) + if (!user_mode(regs)) return -EINVAL; CHECK_FULL_REGS(regs); @@ -1048,11 +1053,41 @@ int is_valid_bugaddr(unsigned long addr) return is_kernel_addr(addr); } +#ifdef CONFIG_MATH_EMULATION +static int emulate_math(struct pt_regs *regs) +{ + int ret; + extern int do_mathemu(struct pt_regs *regs); + + ret = do_mathemu(regs); + if (ret >= 0) + PPC_WARN_EMULATED(math, regs); + + switch (ret) { + case 0: + emulate_single_step(regs); + return 0; + case 1: { + int code = 0; + code = __parse_fpscr(current->thread.fpscr.val); + _exception(SIGFPE, regs, code, regs->nip); + return 0; + } + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return 0; + } + + return -1; +} +#else +static inline int emulate_math(struct pt_regs *regs) { return -1; } +#endif + void __kprobes program_check_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); - extern int do_mathemu(struct pt_regs *regs); /* We can now get here via a FP Unavailable exception if the core * has no FPU, in that case the reason flags will be 0 */ @@ -1114,34 +1149,30 @@ void __kprobes program_check_exception(struct pt_regs *regs) } #endif + /* + * If we took the program check in the kernel skip down to sending a + * SIGILL. The subsequent cases all relate to emulating instructions + * which we should only do for userspace. We also do not want to enable + * interrupts for kernel faults because that might lead to further + * faults, and loose the context of the original exception. + */ + if (!user_mode(regs)) + goto sigill; + /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); -#ifdef CONFIG_MATH_EMULATION /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to * ESR_DST (!?) or 0. In the process of chasing this with the * hardware people - not sure if it can happen on any illegal * instruction or only on FP instructions, whether there is a - * pattern to occurrences etc. -dgibson 31/Mar/2003 */ - switch (do_mathemu(regs)) { - case 0: - emulate_single_step(regs); - goto bail; - case 1: { - int code = 0; - code = __parse_fpscr(current->thread.fpscr.val); - _exception(SIGFPE, regs, code, regs->nip); - goto bail; - } - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + * pattern to occurrences etc. -dgibson 31/Mar/2003 + */ + if (!emulate_math(regs)) goto bail; - } - /* fall through on any other errors */ -#endif /* CONFIG_MATH_EMULATION */ /* Try to emulate it if we should. */ if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { @@ -1156,6 +1187,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) } } +sigill: if (reason & REASON_PRIVILEGED) _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); else @@ -1165,6 +1197,16 @@ bail: exception_exit(prev_state); } +/* + * This occurs when running in hypervisor mode on POWER6 or later + * and an illegal instruction is encountered. + */ +void __kprobes emulation_assist_interrupt(struct pt_regs *regs) +{ + regs->msr |= REASON_ILLEGAL; + program_check_exception(regs); +} + void alignment_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); @@ -1272,26 +1314,60 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -void tm_unavailable_exception(struct pt_regs *regs) +#ifdef CONFIG_PPC64 +void facility_unavailable_exception(struct pt_regs *regs) { + static char *facility_strings[] = { + [FSCR_FP_LG] = "FPU", + [FSCR_VECVSX_LG] = "VMX/VSX", + [FSCR_DSCR_LG] = "DSCR", + [FSCR_PM_LG] = "PMU SPRs", + [FSCR_BHRB_LG] = "BHRB", + [FSCR_TM_LG] = "TM", + [FSCR_EBB_LG] = "EBB", + [FSCR_TAR_LG] = "TAR", + }; + char *facility = "unknown"; + u64 value; + u8 status; + bool hv; + + hv = (regs->trap == 0xf80); + if (hv) + value = mfspr(SPRN_HFSCR); + else + value = mfspr(SPRN_FSCR); + + status = value >> 56; + if (status == FSCR_DSCR_LG) { + /* User is acessing the DSCR. Set the inherit bit and allow + * the user to set it directly in future by setting via the + * FSCR DSCR bit. We always leave HFSCR DSCR set. + */ + current->thread.dscr_inherit = 1; + mtspr(SPRN_FSCR, value | FSCR_DSCR); + return; + } + + if ((status < ARRAY_SIZE(facility_strings)) && + facility_strings[status]) + facility = facility_strings[status]; + /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - /* Currently we never expect a TMU exception. Catch - * this and kill the process! - */ - printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " - "(msr %lx)\n", - regs->nip, regs->msr); + pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", + hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; } - die("Unexpected TM unavailable exception", regs, SIGABRT); + die("Unexpected facility unavailable exception", regs, SIGABRT); } +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1385,12 +1461,6 @@ void performance_monitor_exception(struct pt_regs *regs) #ifdef CONFIG_8xx void SoftwareEmulation(struct pt_regs *regs) { - extern int do_mathemu(struct pt_regs *); - extern int Soft_emulate_8xx(struct pt_regs *); -#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU) - int errcode; -#endif - CHECK_FULL_REGS(regs); if (!user_mode(regs)) { @@ -1398,48 +1468,10 @@ void SoftwareEmulation(struct pt_regs *regs) die("Kernel Mode Software FPU Emulation", regs, SIGFPE); } -#ifdef CONFIG_MATH_EMULATION - errcode = do_mathemu(regs); - if (errcode >= 0) - PPC_WARN_EMULATED(math, regs); - - switch (errcode) { - case 0: - emulate_single_step(regs); + if (!emulate_math(regs)) return; - case 1: { - int code = 0; - code = __parse_fpscr(current->thread.fpscr.val); - _exception(SIGFPE, regs, code, regs->nip); - return; - } - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - default: - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } - -#elif defined(CONFIG_8XX_MINIMAL_FPEMU) - errcode = Soft_emulate_8xx(regs); - if (errcode >= 0) - PPC_WARN_EMULATED(8xx, regs); - switch (errcode) { - case 0: - emulate_single_step(regs); - return; - case 1: - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } -#else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -#endif } #endif /* CONFIG_8xx */ @@ -1786,8 +1818,6 @@ struct ppc_emulated ppc_emulated = { WARN_EMULATED_SETUP(unaligned), #ifdef CONFIG_MATH_EMULATION WARN_EMULATED_SETUP(math), -#elif defined(CONFIG_8XX_MINIMAL_FPEMU) - WARN_EMULATED_SETUP(8xx), #endif #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 9d3fdcd66290..a15837519dca 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -50,7 +50,7 @@ void __init udbg_early_init(void) udbg_init_debug_beat(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); -#elif defined(CONFIG_BOOTX_TEXT) +#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) udbg_init_btext(); #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) /* PPC44x debug */ diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 6837f839ab78..75702e207b29 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -18,23 +18,19 @@ extern void real_writeb(u8 data, volatile u8 __iomem *addr); extern u8 real_205_readb(volatile u8 __iomem *addr); extern void real_205_writeb(u8 data, volatile u8 __iomem *addr); -struct NS16550 { - /* this struct must be packed */ - unsigned char rbr; /* 0 */ - unsigned char ier; /* 1 */ - unsigned char fcr; /* 2 */ - unsigned char lcr; /* 3 */ - unsigned char mcr; /* 4 */ - unsigned char lsr; /* 5 */ - unsigned char msr; /* 6 */ - unsigned char scr; /* 7 */ -}; - -#define thr rbr -#define iir fcr -#define dll rbr -#define dlm ier -#define dlab lcr +#define UART_RBR 0 +#define UART_IER 1 +#define UART_FCR 2 +#define UART_LCR 3 +#define UART_MCR 4 +#define UART_LSR 5 +#define UART_MSR 6 +#define UART_SCR 7 +#define UART_THR UART_RBR +#define UART_IIR UART_FCR +#define UART_DLL UART_RBR +#define UART_DLM UART_IER +#define UART_DLAB UART_LCR #define LSR_DR 0x01 /* Data ready */ #define LSR_OE 0x02 /* Overrun */ @@ -47,52 +43,62 @@ struct NS16550 { #define LCR_DLAB 0x80 -static struct NS16550 __iomem *udbg_comport; +static u8 (*udbg_uart_in)(unsigned int reg); +static void (*udbg_uart_out)(unsigned int reg, u8 data); -static void udbg_550_flush(void) +static void udbg_uart_flush(void) { - if (udbg_comport) { - while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0) - /* wait for idle */; - } + if (!udbg_uart_in) + return; + + /* wait for idle */ + while ((udbg_uart_in(UART_LSR) & LSR_THRE) == 0) + cpu_relax(); } -static void udbg_550_putc(char c) +static void udbg_uart_putc(char c) { - if (udbg_comport) { - if (c == '\n') - udbg_550_putc('\r'); - udbg_550_flush(); - out_8(&udbg_comport->thr, c); - } + if (!udbg_uart_out) + return; + + if (c == '\n') + udbg_uart_putc('\r'); + udbg_uart_flush(); + udbg_uart_out(UART_THR, c); } -static int udbg_550_getc_poll(void) +static int udbg_uart_getc_poll(void) { - if (udbg_comport) { - if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0) - return in_8(&udbg_comport->rbr); - else - return -1; - } + if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR)) + return udbg_uart_in(UART_RBR); return -1; } -static int udbg_550_getc(void) +static int udbg_uart_getc(void) { - if (udbg_comport) { - while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0) - /* wait for char */; - return in_8(&udbg_comport->rbr); - } - return -1; + if (!udbg_uart_in) + return -1; + /* wait for char */ + while (!(udbg_uart_in(UART_LSR) & LSR_DR)) + cpu_relax(); + return udbg_uart_in(UART_RBR); +} + +static void udbg_use_uart(void) +{ + udbg_putc = udbg_uart_putc; + udbg_flush = udbg_uart_flush; + udbg_getc = udbg_uart_getc; + udbg_getc_poll = udbg_uart_getc_poll; } -void udbg_init_uart(void __iomem *comport, unsigned int speed, - unsigned int clock) +void udbg_uart_setup(unsigned int speed, unsigned int clock) { unsigned int dll, base_bauds; + if (!udbg_uart_out) + return; + if (clock == 0) clock = 1843200; if (speed == 0) @@ -101,51 +107,43 @@ void udbg_init_uart(void __iomem *comport, unsigned int speed, base_bauds = clock / 16; dll = base_bauds / speed; - if (comport) { - udbg_comport = (struct NS16550 __iomem *)comport; - out_8(&udbg_comport->lcr, 0x00); - out_8(&udbg_comport->ier, 0xff); - out_8(&udbg_comport->ier, 0x00); - out_8(&udbg_comport->lcr, LCR_DLAB); - out_8(&udbg_comport->dll, dll & 0xff); - out_8(&udbg_comport->dlm, dll >> 8); - /* 8 data, 1 stop, no parity */ - out_8(&udbg_comport->lcr, 0x03); - /* RTS/DTR */ - out_8(&udbg_comport->mcr, 0x03); - /* Clear & enable FIFOs */ - out_8(&udbg_comport->fcr ,0x07); - udbg_putc = udbg_550_putc; - udbg_flush = udbg_550_flush; - udbg_getc = udbg_550_getc; - udbg_getc_poll = udbg_550_getc_poll; - } + udbg_uart_out(UART_LCR, 0x00); + udbg_uart_out(UART_IER, 0xff); + udbg_uart_out(UART_IER, 0x00); + udbg_uart_out(UART_LCR, LCR_DLAB); + udbg_uart_out(UART_DLL, dll & 0xff); + udbg_uart_out(UART_DLM, dll >> 8); + /* 8 data, 1 stop, no parity */ + udbg_uart_out(UART_LCR, 0x3); + /* RTS/DTR */ + udbg_uart_out(UART_MCR, 0x3); + /* Clear & enable FIFOs */ + udbg_uart_out(UART_FCR, 0x7); } -unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock) +unsigned int udbg_probe_uart_speed(unsigned int clock) { unsigned int dll, dlm, divisor, prescaler, speed; u8 old_lcr; - struct NS16550 __iomem *port = comport; - old_lcr = in_8(&port->lcr); + old_lcr = udbg_uart_in(UART_LCR); /* select divisor latch registers. */ - out_8(&port->lcr, LCR_DLAB); + udbg_uart_out(UART_LCR, old_lcr | LCR_DLAB); /* now, read the divisor */ - dll = in_8(&port->dll); - dlm = in_8(&port->dlm); + dll = udbg_uart_in(UART_DLL); + dlm = udbg_uart_in(UART_DLM); divisor = dlm << 8 | dll; /* check prescaling */ - if (in_8(&port->mcr) & 0x80) + if (udbg_uart_in(UART_MCR) & 0x80) prescaler = 4; else prescaler = 1; /* restore the LCR */ - out_8(&port->lcr, old_lcr); + udbg_uart_out(UART_LCR, old_lcr); /* calculate speed */ speed = (clock / prescaler) / (divisor * 16); @@ -157,195 +155,155 @@ unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock) return speed; } -#ifdef CONFIG_PPC_MAPLE -void udbg_maple_real_flush(void) +static union { + unsigned char __iomem *mmio_base; + unsigned long pio_base; +} udbg_uart; + +static unsigned int udbg_uart_stride = 1; + +static u8 udbg_uart_in_pio(unsigned int reg) { - if (udbg_comport) { - while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) - /* wait for idle */; - } + return inb(udbg_uart.pio_base + (reg * udbg_uart_stride)); } -void udbg_maple_real_putc(char c) +static void udbg_uart_out_pio(unsigned int reg, u8 data) { - if (udbg_comport) { - if (c == '\n') - udbg_maple_real_putc('\r'); - udbg_maple_real_flush(); - real_writeb(c, &udbg_comport->thr); eieio(); - } + outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride)); } -void __init udbg_init_maple_realmode(void) +void udbg_uart_init_pio(unsigned long port, unsigned int stride) { - udbg_comport = (struct NS16550 __iomem *)0xf40003f8; - - udbg_putc = udbg_maple_real_putc; - udbg_flush = udbg_maple_real_flush; - udbg_getc = NULL; - udbg_getc_poll = NULL; + if (!port) + return; + udbg_uart.pio_base = port; + udbg_uart_stride = stride; + udbg_uart_in = udbg_uart_in_pio; + udbg_uart_out = udbg_uart_out_pio; + udbg_use_uart(); } -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_PASEMI -void udbg_pas_real_flush(void) +static u8 udbg_uart_in_mmio(unsigned int reg) { - if (udbg_comport) { - while ((real_205_readb(&udbg_comport->lsr) & LSR_THRE) == 0) - /* wait for idle */; - } + return in_8(udbg_uart.mmio_base + (reg * udbg_uart_stride)); } -void udbg_pas_real_putc(char c) +static void udbg_uart_out_mmio(unsigned int reg, u8 data) { - if (udbg_comport) { - if (c == '\n') - udbg_pas_real_putc('\r'); - udbg_pas_real_flush(); - real_205_writeb(c, &udbg_comport->thr); eieio(); - } + out_8(udbg_uart.mmio_base + (reg * udbg_uart_stride), data); } -void udbg_init_pas_realmode(void) -{ - udbg_comport = (struct NS16550 __iomem *)0xfcff03f8UL; - udbg_putc = udbg_pas_real_putc; - udbg_flush = udbg_pas_real_flush; - udbg_getc = NULL; - udbg_getc_poll = NULL; +void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) +{ + if (!addr) + return; + udbg_uart.mmio_base = addr; + udbg_uart_stride = stride; + udbg_uart_in = udbg_uart_in_mmio; + udbg_uart_out = udbg_uart_out_mmio; + udbg_use_uart(); } -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_EARLY_DEBUG_44x -#include <platforms/44x/44x.h> +#ifdef CONFIG_PPC_MAPLE + +#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) -static void udbg_44x_as1_flush(void) +static u8 udbg_uart_in_maple(unsigned int reg) { - if (udbg_comport) { - while ((as1_readb(&udbg_comport->lsr) & LSR_THRE) == 0) - /* wait for idle */; - } + return real_readb(UDBG_UART_MAPLE_ADDR + reg); } -static void udbg_44x_as1_putc(char c) +static void udbg_uart_out_maple(unsigned int reg, u8 val) { - if (udbg_comport) { - if (c == '\n') - udbg_44x_as1_putc('\r'); - udbg_44x_as1_flush(); - as1_writeb(c, &udbg_comport->thr); eieio(); - } + real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); } -static int udbg_44x_as1_getc(void) +void __init udbg_init_maple_realmode(void) { - if (udbg_comport) { - while ((as1_readb(&udbg_comport->lsr) & LSR_DR) == 0) - ; /* wait for char */ - return as1_readb(&udbg_comport->rbr); - } - return -1; + udbg_uart_in = udbg_uart_in_maple; + udbg_uart_out = udbg_uart_out_maple; + udbg_use_uart(); } -void __init udbg_init_44x_as1(void) -{ - udbg_comport = - (struct NS16550 __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR; +#endif /* CONFIG_PPC_MAPLE */ - udbg_putc = udbg_44x_as1_putc; - udbg_flush = udbg_44x_as1_flush; - udbg_getc = udbg_44x_as1_getc; -} -#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ +#ifdef CONFIG_PPC_PASEMI -#ifdef CONFIG_PPC_EARLY_DEBUG_40x -static void udbg_40x_real_flush(void) +#define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) + +static u8 udbg_uart_in_pas(unsigned int reg) { - if (udbg_comport) { - while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0) - /* wait for idle */; - } + return real_205_readb(UDBG_UART_PAS_ADDR + reg); } -static void udbg_40x_real_putc(char c) +static void udbg_uart_out_pas(unsigned int reg, u8 val) { - if (udbg_comport) { - if (c == '\n') - udbg_40x_real_putc('\r'); - udbg_40x_real_flush(); - real_writeb(c, &udbg_comport->thr); eieio(); - } + real_205_writeb(val, UDBG_UART_PAS_ADDR + reg); } -static int udbg_40x_real_getc(void) +void __init udbg_init_pas_realmode(void) { - if (udbg_comport) { - while ((real_readb(&udbg_comport->lsr) & LSR_DR) == 0) - ; /* wait for char */ - return real_readb(&udbg_comport->rbr); - } - return -1; + udbg_uart_in = udbg_uart_in_pas; + udbg_uart_out = udbg_uart_out_pas; + udbg_use_uart(); } -void __init udbg_init_40x_realmode(void) -{ - udbg_comport = (struct NS16550 __iomem *) - CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR; +#endif /* CONFIG_PPC_PASEMI */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_44x - udbg_putc = udbg_40x_real_putc; - udbg_flush = udbg_40x_real_flush; - udbg_getc = udbg_40x_real_getc; - udbg_getc_poll = NULL; +#include <platforms/44x/44x.h> + +static u8 udbg_uart_in_44x_as1(unsigned int reg) +{ + return as1_readb((void __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR + reg); } -#endif /* CONFIG_PPC_EARLY_DEBUG_40x */ -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP -static void udbg_wsp_flush(void) +static void udbg_uart_out_44x_as1(unsigned int reg, u8 val) { - if (udbg_comport) { - while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0) - /* wait for idle */; - } + as1_writeb(val, (void __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR + reg); } -static void udbg_wsp_putc(char c) +void __init udbg_init_44x_as1(void) { - if (udbg_comport) { - if (c == '\n') - udbg_wsp_putc('\r'); - udbg_wsp_flush(); - writeb(c, &udbg_comport->thr); eieio(); - } + udbg_uart_in = udbg_uart_in_44x_as1; + udbg_uart_out = udbg_uart_out_44x_as1; + udbg_use_uart(); } -static int udbg_wsp_getc(void) +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_40x + +static u8 udbg_uart_in_40x(unsigned int reg) { - if (udbg_comport) { - while ((readb(&udbg_comport->lsr) & LSR_DR) == 0) - ; /* wait for char */ - return readb(&udbg_comport->rbr); - } - return -1; + return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR + + reg); } -static int udbg_wsp_getc_poll(void) +static void udbg_uart_out_40x(unsigned int reg, u8 val) { - if (udbg_comport) - if (readb(&udbg_comport->lsr) & LSR_DR) - return readb(&udbg_comport->rbr); - return -1; + real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR + + reg); } -void __init udbg_init_wsp(void) +void __init udbg_init_40x_realmode(void) { - udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT; + udbg_uart_in = udbg_uart_in_40x; + udbg_uart_out = udbg_uart_out_40x; + udbg_use_uart(); +} - udbg_init_uart(udbg_comport, 57600, 50000000); +#endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - udbg_putc = udbg_wsp_putc; - udbg_flush = udbg_wsp_flush; - udbg_getc = udbg_wsp_getc; - udbg_getc_poll = udbg_wsp_getc_poll; +void __init udbg_init_wsp(void) +{ + udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1); + udbg_uart_setup(57600, 50000000); } + #endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d4f463ac65b1..1d9c92621b36 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -711,7 +711,7 @@ static void __init vdso_setup_syscall_map(void) } #ifdef CONFIG_PPC64 -int __cpuinit vdso_getcpu_init(void) +int vdso_getcpu_init(void) { unsigned long cpu, node, val; diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 27e2f623210b..6b1f2a6d5517 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -232,9 +232,9 @@ __do_get_tspec: lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) /* Get a stable TB value */ -2: mftbu r3 - mftbl r4 - mftbu r0 +2: mfspr r3, SPRN_TBRU + mfspr r4, SPRN_TBRL + mfspr r0, SPRN_TBRU cmplw cr0,r3,r0 bne- 2b diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 536016d792ba..d38cc08b16c7 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1153,7 +1153,7 @@ EXPORT_SYMBOL(vio_h_cop_sync); static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) { - const unsigned char *dma_window; + const __be32 *dma_window; struct iommu_table *tbl; unsigned long offset, size; @@ -1312,8 +1312,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) { struct vio_dev *viodev; struct device_node *parent_node; - const unsigned int *unit_address; - const unsigned int *pfo_resid = NULL; + const __be32 *prop; enum vio_dev_family family; const char *of_node_name = of_node->name ? of_node->name : "<unknown>"; @@ -1360,6 +1359,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) /* we need the 'device_type' property, in order to match with drivers */ viodev->family = family; if (viodev->family == VDEVICE) { + unsigned int unit_address; + if (of_node->type != NULL) viodev->type = of_node->type; else { @@ -1368,24 +1369,24 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) goto out; } - unit_address = of_get_property(of_node, "reg", NULL); - if (unit_address == NULL) { + prop = of_get_property(of_node, "reg", NULL); + if (prop == NULL) { pr_warn("%s: node %s missing 'reg'\n", __func__, of_node_name); goto out; } - dev_set_name(&viodev->dev, "%x", *unit_address); + unit_address = of_read_number(prop, 1); + dev_set_name(&viodev->dev, "%x", unit_address); viodev->irq = irq_of_parse_and_map(of_node, 0); - viodev->unit_address = *unit_address; + viodev->unit_address = unit_address; } else { /* PFO devices need their resource_id for submitting COP_OPs * This is an optional field for devices, but is required when * performing synchronous ops */ - pfo_resid = of_get_property(of_node, "ibm,resource-id", NULL); - if (pfo_resid != NULL) - viodev->resource_id = *pfo_resid; + prop = of_get_property(of_node, "ibm,resource-id", NULL); + if (prop != NULL) + viodev->resource_id = of_read_number(prop, 1); - unit_address = NULL; dev_set_name(&viodev->dev, "%s", of_node_name); viodev->type = of_node_name; viodev->irq = 0; @@ -1529,11 +1530,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; + if (!dn) { + strcat(buf, "\n"); + return strlen(buf); + } cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (!cp) { + strcat(buf, "\n"); + return strlen(buf); + } return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); } @@ -1622,7 +1627,6 @@ static struct vio_dev *vio_find_name(const char *name) */ struct vio_dev *vio_find_node(struct device_node *vnode) { - const uint32_t *unit_address; char kobj_name[20]; struct device_node *vnode_parent; const char *dev_type; @@ -1638,10 +1642,13 @@ struct vio_dev *vio_find_node(struct device_node *vnode) /* construct the kobject name from the device node */ if (!strcmp(dev_type, "vdevice")) { - unit_address = of_get_property(vnode, "reg", NULL); - if (!unit_address) + const __be32 *prop; + + prop = of_get_property(vnode, "reg", NULL); + if (!prop) return NULL; - snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address); + snprintf(kobj_name, sizeof(kobj_name), "%x", + (uint32_t)of_read_number(prop, 1)); } else if (!strcmp(dev_type, "ibm,platform-facilities")) snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name); else diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 654e479802f2..f096e72262f4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - . = 0; - reloc_start = .; - . = KERNELBASE; /* |