summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile9
-rw-r--r--arch/powerpc/kernel/align.c14
-rw-r--r--arch/powerpc/kernel/asm-offsets.c14
-rw-r--r--arch/powerpc/kernel/btext.c254
-rw-r--r--arch/powerpc/kernel/cacheinfo.c48
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/cputable.c30
-rw-r--r--arch/powerpc/kernel/crash_dump.c10
-rw-r--r--arch/powerpc/kernel/eeh.c1068
-rw-r--r--arch/powerpc/kernel/eeh_cache.c310
-rw-r--r--arch/powerpc/kernel/eeh_dev.c112
-rw-r--r--arch/powerpc/kernel/eeh_driver.c732
-rw-r--r--arch/powerpc/kernel/eeh_event.c182
-rw-r--r--arch/powerpc/kernel/eeh_pe.c792
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c95
-rw-r--r--arch/powerpc/kernel/entry_64.S90
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c28
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S180
-rw-r--r--arch/powerpc/kernel/head_40x.S8
-rw-r--r--arch/powerpc/kernel/head_44x.S10
-rw-r--r--arch/powerpc/kernel/head_64.S1
-rw-r--r--arch/powerpc/kernel/head_8xx.S4
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S10
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/ibmebus.c22
-rw-r--r--arch/powerpc/kernel/idle.c4
-rw-r--r--arch/powerpc/kernel/io-workarounds.c30
-rw-r--r--arch/powerpc/kernel/io.c3
-rw-r--r--arch/powerpc/kernel/iommu.c327
-rw-r--r--arch/powerpc/kernel/irq.c106
-rw-r--r--arch/powerpc/kernel/kprobes.c20
-rw-r--r--arch/powerpc/kernel/kvm.c9
-rw-r--r--arch/powerpc/kernel/legacy_serial.c62
-rw-r--r--arch/powerpc/kernel/lparcfg.c712
-rw-r--r--arch/powerpc/kernel/misc_32.S28
-rw-r--r--arch/powerpc/kernel/misc_64.S60
-rw-r--r--arch/powerpc/kernel/nvram_64.c20
-rw-r--r--arch/powerpc/kernel/paca.c10
-rw-r--r--arch/powerpc/kernel/pci-common.c44
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c110
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/pci_dn.c20
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c80
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c3
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c20
-rw-r--r--arch/powerpc/kernel/process.c26
-rw-r--r--arch/powerpc/kernel/prom.c175
-rw-r--r--arch/powerpc/kernel/prom_init.c298
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh3
-rw-r--r--arch/powerpc/kernel/prom_parse.c17
-rw-r--r--arch/powerpc/kernel/ptrace.c30
-rw-r--r--arch/powerpc/kernel/reloc_32.S3
-rw-r--r--arch/powerpc/kernel/rtas.c70
-rw-r--r--arch/powerpc/kernel/setup-common.c13
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c42
-rw-r--r--arch/powerpc/kernel/signal_32.c79
-rw-r--r--arch/powerpc/kernel/signal_64.c26
-rw-r--r--arch/powerpc/kernel/smp.c169
-rw-r--r--arch/powerpc/kernel/softemu8xx.c199
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S45
-rw-r--r--arch/powerpc/kernel/swsusp_booke.S8
-rw-r--r--arch/powerpc/kernel/sysfs.c24
-rw-r--r--arch/powerpc/kernel/time.c21
-rw-r--r--arch/powerpc/kernel/tm.S111
-rw-r--r--arch/powerpc/kernel/traps.c182
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c370
-rw-r--r--arch/powerpc/kernel/vdso.c2
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S6
-rw-r--r--arch/powerpc/kernel/vio.c45
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S3
73 files changed, 5461 insertions, 2216 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f960a7944553..445cb6e39d5b 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -55,9 +55,10 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
-obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
+obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+ eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
@@ -100,7 +101,7 @@ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
-pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o
+pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
@@ -115,9 +116,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
-obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
-
-ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
+ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
obj-y += iomap.o
endif
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index ee5b690a0bed..a27ccd5dc6b9 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -651,6 +651,10 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
int sw = 0;
int i, j;
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+
flush_vsx_to_thread(current);
if (reg < 32)
@@ -764,6 +768,16 @@ int fix_alignment(struct pt_regs *regs)
nb = aligninfo[instr].len;
flags = aligninfo[instr].flags;
+ /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */
+ if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) {
+ nb = 8;
+ flags = LD+SW;
+ } else if (IS_XFORM(instruction) &&
+ ((instruction >> 1) & 0x3ff) == 660) {
+ nb = 8;
+ flags = ST+SW;
+ }
+
/* Byteswap little endian loads and stores */
swiz = 0;
if (regs->msr & MSR_LE) {
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6f16ffafa6f0..502c7a4e73f7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -80,10 +80,11 @@ int main(void)
DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
#else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+ DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
+ DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
#endif /* CONFIG_PPC64 */
DEFINE(KSP, offsetof(struct thread_struct, ksp));
- DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
#ifdef CONFIG_BOOKE
DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
@@ -105,9 +106,6 @@ int main(void)
DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
#else /* CONFIG_PPC64 */
DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
-#endif
#ifdef CONFIG_SPE
DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
@@ -115,6 +113,9 @@ int main(void)
DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
#endif
@@ -132,13 +133,15 @@ int main(void)
DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier));
DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0));
DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2));
- DEFINE(THREAD_MMCRA, offsetof(struct thread_struct, mmcra));
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch));
DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
+ DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
+ DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
+ DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
transact_vr[0]));
@@ -452,6 +455,7 @@ int main(void)
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
+ DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index ac8f52732fde..41c011cb6070 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -25,11 +25,6 @@
static void scrollscreen(void);
#endif
-static void draw_byte(unsigned char c, long locX, long locY);
-static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
-
#define __force_data __attribute__((__section__(".data")))
static int g_loc_X __force_data;
@@ -52,6 +47,26 @@ static unsigned char vga_font[cmapsz];
int boot_text_mapped __force_data = 0;
int force_printk_to_btext = 0;
+extern void rmci_on(void);
+extern void rmci_off(void);
+
+static inline void rmci_maybe_on(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
+ if (!(mfmsr() & MSR_DR))
+ rmci_on();
+#endif
+}
+
+static inline void rmci_maybe_off(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
+ if (!(mfmsr() & MSR_DR))
+ rmci_off();
+#endif
+}
+
+
#ifdef CONFIG_PPC32
/* Calc BAT values for mapping the display and store them
* in disp_BAT. Those values are then used from head.S to map
@@ -134,7 +149,7 @@ void __init btext_unmap(void)
* changes.
*/
-static void map_boot_text(void)
+void btext_map(void)
{
unsigned long base, offset, size;
unsigned char *vbase;
@@ -209,7 +224,7 @@ int btext_initialize(struct device_node *np)
dispDeviceRect[2] = width;
dispDeviceRect[3] = height;
- map_boot_text();
+ btext_map();
return 0;
}
@@ -283,7 +298,7 @@ void btext_update_display(unsigned long phys, int width, int height,
iounmap(logicalDisplayBase);
boot_text_mapped = 0;
}
- map_boot_text();
+ btext_map();
g_loc_X = 0;
g_loc_Y = 0;
g_max_loc_X = width / 8;
@@ -298,6 +313,7 @@ void btext_clearscreen(void)
(dispDeviceDepth >> 3)) >> 2;
int i,j;
+ rmci_maybe_on();
for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
{
unsigned int *ptr = base;
@@ -305,6 +321,7 @@ void btext_clearscreen(void)
*(ptr++) = 0;
base += (dispDeviceRowBytes >> 2);
}
+ rmci_maybe_off();
}
void btext_flushscreen(void)
@@ -355,6 +372,8 @@ static void scrollscreen(void)
(dispDeviceDepth >> 3)) >> 2;
int i,j;
+ rmci_maybe_on();
+
for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
{
unsigned int *src_ptr = src;
@@ -371,9 +390,116 @@ static void scrollscreen(void)
*(dst_ptr++) = 0;
dst += (dispDeviceRowBytes >> 2);
}
+
+ rmci_maybe_off();
}
#endif /* ndef NO_SCROLL */
+static unsigned int expand_bits_8[16] = {
+ 0x00000000,
+ 0x000000ff,
+ 0x0000ff00,
+ 0x0000ffff,
+ 0x00ff0000,
+ 0x00ff00ff,
+ 0x00ffff00,
+ 0x00ffffff,
+ 0xff000000,
+ 0xff0000ff,
+ 0xff00ff00,
+ 0xff00ffff,
+ 0xffff0000,
+ 0xffff00ff,
+ 0xffffff00,
+ 0xffffffff
+};
+
+static unsigned int expand_bits_16[4] = {
+ 0x00000000,
+ 0x0000ffff,
+ 0xffff0000,
+ 0xffffffff
+};
+
+
+static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+{
+ int l, bits;
+ int fg = 0xFFFFFFFFUL;
+ int bg = 0x00000000UL;
+
+ for (l = 0; l < 16; ++l)
+ {
+ bits = *font++;
+ base[0] = (-(bits >> 7) & fg) ^ bg;
+ base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
+ base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
+ base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
+ base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
+ base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
+ base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
+ base[7] = (-(bits & 1) & fg) ^ bg;
+ base = (unsigned int *) ((char *)base + rb);
+ }
+}
+
+static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+{
+ int l, bits;
+ int fg = 0xFFFFFFFFUL;
+ int bg = 0x00000000UL;
+ unsigned int *eb = (int *)expand_bits_16;
+
+ for (l = 0; l < 16; ++l)
+ {
+ bits = *font++;
+ base[0] = (eb[bits >> 6] & fg) ^ bg;
+ base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
+ base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
+ base[3] = (eb[bits & 3] & fg) ^ bg;
+ base = (unsigned int *) ((char *)base + rb);
+ }
+}
+
+static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+{
+ int l, bits;
+ int fg = 0x0F0F0F0FUL;
+ int bg = 0x00000000UL;
+ unsigned int *eb = (int *)expand_bits_8;
+
+ for (l = 0; l < 16; ++l)
+ {
+ bits = *font++;
+ base[0] = (eb[bits >> 4] & fg) ^ bg;
+ base[1] = (eb[bits & 0xf] & fg) ^ bg;
+ base = (unsigned int *) ((char *)base + rb);
+ }
+}
+
+static noinline void draw_byte(unsigned char c, long locX, long locY)
+{
+ unsigned char *base = calc_base(locX << 3, locY << 4);
+ unsigned char *font = &vga_font[((unsigned int)c) * 16];
+ int rb = dispDeviceRowBytes;
+
+ rmci_maybe_on();
+ switch(dispDeviceDepth) {
+ case 24:
+ case 32:
+ draw_byte_32(font, (unsigned int *)base, rb);
+ break;
+ case 15:
+ case 16:
+ draw_byte_16(font, (unsigned int *)base, rb);
+ break;
+ case 8:
+ draw_byte_8(font, (unsigned int *)base, rb);
+ break;
+ }
+ rmci_maybe_off();
+}
+
void btext_drawchar(char c)
{
int cline = 0;
@@ -465,107 +591,12 @@ void btext_drawhex(unsigned long v)
btext_drawchar(' ');
}
-static void draw_byte(unsigned char c, long locX, long locY)
-{
- unsigned char *base = calc_base(locX << 3, locY << 4);
- unsigned char *font = &vga_font[((unsigned int)c) * 16];
- int rb = dispDeviceRowBytes;
-
- switch(dispDeviceDepth) {
- case 24:
- case 32:
- draw_byte_32(font, (unsigned int *)base, rb);
- break;
- case 15:
- case 16:
- draw_byte_16(font, (unsigned int *)base, rb);
- break;
- case 8:
- draw_byte_8(font, (unsigned int *)base, rb);
- break;
- }
-}
-
-static unsigned int expand_bits_8[16] = {
- 0x00000000,
- 0x000000ff,
- 0x0000ff00,
- 0x0000ffff,
- 0x00ff0000,
- 0x00ff00ff,
- 0x00ffff00,
- 0x00ffffff,
- 0xff000000,
- 0xff0000ff,
- 0xff00ff00,
- 0xff00ffff,
- 0xffff0000,
- 0xffff00ff,
- 0xffffff00,
- 0xffffffff
-};
-
-static unsigned int expand_bits_16[4] = {
- 0x00000000,
- 0x0000ffff,
- 0xffff0000,
- 0xffffffff
-};
-
-
-static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
-{
- int l, bits;
- int fg = 0xFFFFFFFFUL;
- int bg = 0x00000000UL;
-
- for (l = 0; l < 16; ++l)
- {
- bits = *font++;
- base[0] = (-(bits >> 7) & fg) ^ bg;
- base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
- base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
- base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
- base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
- base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
- base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
- base[7] = (-(bits & 1) & fg) ^ bg;
- base = (unsigned int *) ((char *)base + rb);
- }
-}
-
-static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
-{
- int l, bits;
- int fg = 0xFFFFFFFFUL;
- int bg = 0x00000000UL;
- unsigned int *eb = (int *)expand_bits_16;
-
- for (l = 0; l < 16; ++l)
- {
- bits = *font++;
- base[0] = (eb[bits >> 6] & fg) ^ bg;
- base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
- base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
- base[3] = (eb[bits & 3] & fg) ^ bg;
- base = (unsigned int *) ((char *)base + rb);
- }
-}
-
-static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+void __init udbg_init_btext(void)
{
- int l, bits;
- int fg = 0x0F0F0F0FUL;
- int bg = 0x00000000UL;
- unsigned int *eb = (int *)expand_bits_8;
-
- for (l = 0; l < 16; ++l)
- {
- bits = *font++;
- base[0] = (eb[bits >> 4] & fg) ^ bg;
- base[1] = (eb[bits & 0xf] & fg) ^ bg;
- base = (unsigned int *) ((char *)base + rb);
- }
+ /* If btext is enabled, we might have a BAT setup for early display,
+ * thus we do enable some very basic udbg output
+ */
+ udbg_putc = btext_drawchar;
}
static unsigned char vga_font[cmapsz] = {
@@ -913,10 +944,3 @@ static unsigned char vga_font[cmapsz] = {
0x00, 0x00, 0x00, 0x00,
};
-void __init udbg_init_btext(void)
-{
- /* If btext is enabled, we might have a BAT setup for early display,
- * thus we do enable some very basic udbg output
- */
- udbg_putc = btext_drawchar;
-}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 92c6b008dd2b..654932727873 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -131,7 +131,8 @@ static const char *cache_type_string(const struct cache *cache)
return cache_type_info[cache->type].name;
}
-static void __cpuinit cache_init(struct cache *cache, int type, int level, struct device_node *ofnode)
+static void cache_init(struct cache *cache, int type, int level,
+ struct device_node *ofnode)
{
cache->type = type;
cache->level = level;
@@ -140,7 +141,7 @@ static void __cpuinit cache_init(struct cache *cache, int type, int level, struc
list_add(&cache->list, &cache_list);
}
-static struct cache *__cpuinit new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level, struct device_node *ofnode)
{
struct cache *cache;
@@ -195,7 +196,7 @@ static void cache_cpu_set(struct cache *cache, int cpu)
static int cache_size(const struct cache *cache, unsigned int *ret)
{
const char *propname;
- const u32 *cache_size;
+ const __be32 *cache_size;
propname = cache_type_info[cache->type].size_prop;
@@ -203,7 +204,7 @@ static int cache_size(const struct cache *cache, unsigned int *ret)
if (!cache_size)
return -ENODEV;
- *ret = *cache_size;
+ *ret = of_read_number(cache_size, 1);
return 0;
}
@@ -221,7 +222,7 @@ static int cache_size_kb(const struct cache *cache, unsigned int *ret)
/* not cache_line_size() because that's a macro in include/linux/cache.h */
static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
{
- const u32 *line_size;
+ const __be32 *line_size;
int i, lim;
lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
@@ -238,14 +239,14 @@ static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
if (!line_size)
return -ENODEV;
- *ret = *line_size;
+ *ret = of_read_number(line_size, 1);
return 0;
}
static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
{
const char *propname;
- const u32 *nr_sets;
+ const __be32 *nr_sets;
propname = cache_type_info[cache->type].nr_sets_prop;
@@ -253,7 +254,7 @@ static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
if (!nr_sets)
return -ENODEV;
- *ret = *nr_sets;
+ *ret = of_read_number(nr_sets, 1);
return 0;
}
@@ -324,7 +325,8 @@ static bool cache_node_is_unified(const struct device_node *np)
return of_get_property(np, "cache-unified", NULL);
}
-static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node,
+ int level)
{
struct cache *cache;
@@ -335,7 +337,8 @@ static struct cache *__cpuinit cache_do_one_devnode_unified(struct device_node *
return cache;
}
-static struct cache *__cpuinit cache_do_one_devnode_split(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_split(struct device_node *node,
+ int level)
{
struct cache *dcache, *icache;
@@ -357,7 +360,7 @@ err:
return NULL;
}
-static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int level)
{
struct cache *cache;
@@ -369,7 +372,8 @@ static struct cache *__cpuinit cache_do_one_devnode(struct device_node *node, in
return cache;
}
-static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *node, int level)
+static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+ int level)
{
struct cache *cache;
@@ -385,7 +389,7 @@ static struct cache *__cpuinit cache_lookup_or_instantiate(struct device_node *n
return cache;
}
-static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigger)
+static void link_cache_lists(struct cache *smaller, struct cache *bigger)
{
while (smaller->next_local) {
if (smaller->next_local == bigger)
@@ -396,13 +400,13 @@ static void __cpuinit link_cache_lists(struct cache *smaller, struct cache *bigg
smaller->next_local = bigger;
}
-static void __cpuinit do_subsidiary_caches_debugcheck(struct cache *cache)
+static void do_subsidiary_caches_debugcheck(struct cache *cache)
{
WARN_ON_ONCE(cache->level != 1);
WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
}
-static void __cpuinit do_subsidiary_caches(struct cache *cache)
+static void do_subsidiary_caches(struct cache *cache)
{
struct device_node *subcache_node;
int level = cache->level;
@@ -423,7 +427,7 @@ static void __cpuinit do_subsidiary_caches(struct cache *cache)
}
}
-static struct cache *__cpuinit cache_chain_instantiate(unsigned int cpu_id)
+static struct cache *cache_chain_instantiate(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cpu_cache = NULL;
@@ -448,7 +452,7 @@ out:
return cpu_cache;
}
-static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_id)
+static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id)
{
struct cache_dir *cache_dir;
struct device *dev;
@@ -653,7 +657,7 @@ static struct kobj_type cache_index_type = {
.default_attrs = cache_index_default_attrs,
};
-static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
{
const char *cache_name;
const char *cache_type;
@@ -696,7 +700,8 @@ static void __cpuinit cacheinfo_create_index_opt_attrs(struct cache_index_dir *d
kfree(buf);
}
-static void __cpuinit cacheinfo_create_index_dir(struct cache *cache, int index, struct cache_dir *cache_dir)
+static void cacheinfo_create_index_dir(struct cache *cache, int index,
+ struct cache_dir *cache_dir)
{
struct cache_index_dir *index_dir;
int rc;
@@ -722,7 +727,8 @@ err:
kfree(index_dir);
}
-static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache *cache_list)
+static void cacheinfo_sysfs_populate(unsigned int cpu_id,
+ struct cache *cache_list)
{
struct cache_dir *cache_dir;
struct cache *cache;
@@ -740,7 +746,7 @@ static void __cpuinit cacheinfo_sysfs_populate(unsigned int cpu_id, struct cache
}
}
-void __cpuinit cacheinfo_cpu_online(unsigned int cpu_id)
+void cacheinfo_cpu_online(unsigned int cpu_id)
{
struct cache *cache;
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 0b9af015bedc..bfb18c7290b7 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -75,7 +75,7 @@ _GLOBAL(__setup_cpu_e500v2)
bl __e500_icache_setup
bl __e500_dcache_setup
bl __setup_e500_ivors
-#ifdef CONFIG_FSL_RIO
+#if defined(CONFIG_FSL_RIO) || defined(CONFIG_FSL_PCI)
/* Ensure that RFXE is set */
mfspr r3,SPRN_HID1
oris r3,r3,HID1_RFXE@h
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1f0937d7d4b5..597d954e5860 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -452,8 +452,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.mmu_features = MMU_FTRS_POWER8,
.icache_bsize = 128,
.dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_cpu_type = 0,
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.platform = "power8",
@@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_restore = __restore_cpu_power7,
.platform = "power7+",
},
- { /* Power8 */
+ { /* Power8E */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004b0000,
+ .cpu_name = "POWER8E (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .platform = "power8",
+ },
+ { /* Power8 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004d0000,
.cpu_name = "POWER8 (raw)",
.cpu_features = CPU_FTRS_POWER8,
.cpu_user_features = COMMON_USER_POWER8,
@@ -506,8 +524,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = 0,
- .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
.platform = "power8",
@@ -2087,7 +2105,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
MMU_FTR_USE_TLBILX,
.icache_bsize = 64,
.dcache_bsize = 64,
- .num_pmcs = 4,
+ .num_pmcs = 6,
.oprofile_cpu_type = "ppc/e6500",
.oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e6500,
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9ec3fe174cba..779a78c26435 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void)
}
#endif /* CONFIG_NONSTATIC_KERNEL */
-static int __init parse_savemaxmem(char *p)
-{
- if (p)
- saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
-
- return 1;
-}
-__setup("savemaxmem=", parse_savemaxmem);
-
-
static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
unsigned long offset, int userbuf)
{
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
new file mode 100644
index 000000000000..55593ee2d5aa
--- /dev/null
+++ b/arch/powerpc/kernel/eeh.c
@@ -0,0 +1,1068 @@
+/*
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ * Copyright 2001-2012 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/of.h>
+
+#include <linux/atomic.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+
+/** Overview:
+ * EEH, or "Extended Error Handling" is a PCI bridge technology for
+ * dealing with PCI bus errors that can't be dealt with within the
+ * usual PCI framework, except by check-stopping the CPU. Systems
+ * that are designed for high-availability/reliability cannot afford
+ * to crash due to a "mere" PCI error, thus the need for EEH.
+ * An EEH-capable bridge operates by converting a detected error
+ * into a "slot freeze", taking the PCI adapter off-line, making
+ * the slot behave, from the OS'es point of view, as if the slot
+ * were "empty": all reads return 0xff's and all writes are silently
+ * ignored. EEH slot isolation events can be triggered by parity
+ * errors on the address or data busses (e.g. during posted writes),
+ * which in turn might be caused by low voltage on the bus, dust,
+ * vibration, humidity, radioactivity or plain-old failed hardware.
+ *
+ * Note, however, that one of the leading causes of EEH slot
+ * freeze events are buggy device drivers, buggy device microcode,
+ * or buggy device hardware. This is because any attempt by the
+ * device to bus-master data to a memory address that is not
+ * assigned to the device will trigger a slot freeze. (The idea
+ * is to prevent devices-gone-wild from corrupting system memory).
+ * Buggy hardware/drivers will have a miserable time co-existing
+ * with EEH.
+ *
+ * Ideally, a PCI device driver, when suspecting that an isolation
+ * event has occurred (e.g. by reading 0xff's), will then ask EEH
+ * whether this is the case, and then take appropriate steps to
+ * reset the PCI slot, the PCI device, and then resume operations.
+ * However, until that day, the checking is done here, with the
+ * eeh_check_failure() routine embedded in the MMIO macros. If
+ * the slot is found to be isolated, an "EEH Event" is synthesized
+ * and sent out for processing.
+ */
+
+/* If a device driver keeps reading an MMIO register in an interrupt
+ * handler after a slot isolation event, it might be broken.
+ * This sets the threshold for how many read attempts we allow
+ * before printing an error message.
+ */
+#define EEH_MAX_FAILS 2100000
+
+/* Time to wait for a PCI slot to report status, in milliseconds */
+#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
+
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
+
+int eeh_subsystem_enabled;
+EXPORT_SYMBOL(eeh_subsystem_enabled);
+
+/*
+ * EEH probe mode support. The intention is to support multiple
+ * platforms for EEH. Some platforms like pSeries do PCI emunation
+ * based on device tree. However, other platforms like powernv probe
+ * PCI devices from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for particular
+ * OF node or PCI device so that the corresponding PE would be created
+ * there.
+ */
+int eeh_probe_mode;
+
+/* Lock to avoid races due to multiple reports of an error */
+DEFINE_RAW_SPINLOCK(confirm_error_lock);
+
+/* Buffer for reporting pci register dumps. Its here in BSS, and
+ * not dynamically alloced, so that it ends up in RMO where RTAS
+ * can access it.
+ */
+#define EEH_PCI_REGS_LOG_LEN 4096
+static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
+
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+ u64 no_device; /* PCI device not found */
+ u64 no_dn; /* OF node not found */
+ u64 no_cfg_addr; /* Config address not found */
+ u64 ignored_check; /* EEH check skipped */
+ u64 total_mmio_ffs; /* Total EEH checks */
+ u64 false_positives; /* Unnecessary EEH checks */
+ u64 slot_resets; /* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
+
+#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
+
+/**
+ * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
+ * @edev: device to report data for
+ * @buf: point to buffer in which to log
+ * @len: amount of room in buffer
+ *
+ * This routine captures assorted PCI configuration space data,
+ * and puts them into a buffer for RTAS error logging.
+ */
+static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
+{
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ u32 cfg;
+ int cap, i;
+ int n = 0;
+
+ n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
+ printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
+
+ eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
+
+ if (!dev) {
+ printk(KERN_WARNING "EEH: no PCI device for this of node\n");
+ return n;
+ }
+
+ /* Gather bridge-specific registers */
+ if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
+ eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
+ n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
+ printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
+
+ eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
+ n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
+ printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
+ }
+
+ /* Dump out the PCI-X command and status regs */
+ cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+ if (cap) {
+ eeh_ops->read_config(dn, cap, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+
+ eeh_ops->read_config(dn, cap+4, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
+ printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+ }
+
+ /* If PCI-E capable, dump PCI-E cap 10, and the AER */
+ cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
+ printk(KERN_WARNING
+ "EEH: PCI-E capabilities and status follow:\n");
+
+ for (i=0; i<=8; i++) {
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+ }
+
+ cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+ printk(KERN_WARNING
+ "EEH: PCI-E AER capability register set follows:\n");
+
+ for (i=0; i<14; i++) {
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
+ }
+ }
+ }
+
+ return n;
+}
+
+/**
+ * eeh_slot_error_detail - Generate combined log including driver log and error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ *
+ * This routine should be called to generate the combined log, which
+ * is comprised of driver log and error log. The driver log is figured
+ * out from the config space of the corresponding PCI device, while
+ * the error log is fetched through platform dependent function call.
+ */
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
+{
+ size_t loglen = 0;
+ struct eeh_dev *edev, *tmp;
+ bool valid_cfg_log = true;
+
+ /*
+ * When the PHB is fenced or dead, it's pointless to collect
+ * the data from PCI config space because it should return
+ * 0xFF's. For ER, we still retrieve the data from the PCI
+ * config space.
+ */
+ if (eeh_probe_mode_dev() &&
+ (pe->type & EEH_PE_PHB) &&
+ (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)))
+ valid_cfg_log = false;
+
+ if (valid_cfg_log) {
+ eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ eeh_ops->configure_bridge(pe);
+ eeh_pe_restore_bars(pe);
+
+ pci_regs_buf[0] = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
+ EEH_PCI_REGS_LOG_LEN - loglen);
+ }
+ }
+
+ eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
+}
+
+/**
+ * eeh_token_to_phys - Convert EEH address token to phys address
+ * @token: I/O token, should be address in the form 0xA....
+ *
+ * This routine should be called to convert virtual I/O address
+ * to physical one.
+ */
+static inline unsigned long eeh_token_to_phys(unsigned long token)
+{
+ pte_t *ptep;
+ unsigned long pa;
+ int hugepage_shift;
+
+ /*
+ * We won't find hugepages here, iomem
+ */
+ ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
+ if (!ptep)
+ return token;
+ WARN_ON(hugepage_shift);
+ pa = pte_pfn(*ptep) << PAGE_SHIFT;
+
+ return pa | (token & (PAGE_SIZE-1));
+}
+
+/*
+ * On PowerNV platform, we might already have fenced PHB there.
+ * For that case, it's meaningless to recover frozen PE. Intead,
+ * We have to handle fenced PHB firstly.
+ */
+static int eeh_phb_check_failure(struct eeh_pe *pe)
+{
+ struct eeh_pe *phb_pe;
+ unsigned long flags;
+ int ret;
+
+ if (!eeh_probe_mode_dev())
+ return -EPERM;
+
+ /* Find the PHB PE */
+ phb_pe = eeh_phb_pe_get(pe->phb);
+ if (!phb_pe) {
+ pr_warning("%s Can't find PE for PHB#%d\n",
+ __func__, pe->phb->global_number);
+ return -EEXIST;
+ }
+
+ /* If the PHB has been in problematic state */
+ eeh_serialize_lock(&flags);
+ if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Check PHB state */
+ ret = eeh_ops->get_state(phb_pe, NULL);
+ if ((ret < 0) ||
+ (ret == EEH_STATE_NOT_SUPPORT) ||
+ (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Isolate the PHB and send event */
+ eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_serialize_unlock(flags);
+ eeh_send_failure_event(phb_pe);
+
+ pr_err("EEH: PHB#%x failure detected\n",
+ phb_pe->phb->global_number);
+ dump_stack();
+
+ return 1;
+out:
+ eeh_serialize_unlock(flags);
+ return ret;
+}
+
+/**
+ * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @edev: eeh device
+ *
+ * Check for an EEH failure for the given device node. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze. This routine
+ * will query firmware for the EEH status.
+ *
+ * Returns 0 if there has not been an EEH error; otherwise returns
+ * a non-zero value and queues up a slot isolation event notification.
+ *
+ * It is safe to call this routine in an interrupt context.
+ */
+int eeh_dev_check_failure(struct eeh_dev *edev)
+{
+ int ret;
+ unsigned long flags;
+ struct device_node *dn;
+ struct pci_dev *dev;
+ struct eeh_pe *pe;
+ int rc = 0;
+ const char *location;
+
+ eeh_stats.total_mmio_ffs++;
+
+ if (!eeh_subsystem_enabled)
+ return 0;
+
+ if (!edev) {
+ eeh_stats.no_dn++;
+ return 0;
+ }
+ dn = eeh_dev_to_of_node(edev);
+ dev = eeh_dev_to_pci_dev(edev);
+ pe = edev->pe;
+
+ /* Access to IO BARs might get this far and still not want checking. */
+ if (!pe) {
+ eeh_stats.ignored_check++;
+ pr_debug("EEH: Ignored check for %s %s\n",
+ eeh_pci_name(dev), dn->full_name);
+ return 0;
+ }
+
+ if (!pe->addr && !pe->config_addr) {
+ eeh_stats.no_cfg_addr++;
+ return 0;
+ }
+
+ /*
+ * On PowerNV platform, we might already have fenced PHB
+ * there and we need take care of that firstly.
+ */
+ ret = eeh_phb_check_failure(pe);
+ if (ret > 0)
+ return ret;
+
+ /* If we already have a pending isolation event for this
+ * slot, we know it's bad already, we don't need to check.
+ * Do this checking under a lock; as multiple PCI devices
+ * in one slot might report errors simultaneously, and we
+ * only want one error recovery routine running.
+ */
+ eeh_serialize_lock(&flags);
+ rc = 1;
+ if (pe->state & EEH_PE_ISOLATED) {
+ pe->check_count++;
+ if (pe->check_count % EEH_MAX_FAILS == 0) {
+ location = of_get_property(dn, "ibm,loc-code", NULL);
+ printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
+ "location=%s driver=%s pci addr=%s\n",
+ pe->check_count, location,
+ eeh_driver_name(dev), eeh_pci_name(dev));
+ printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ dump_stack();
+ }
+ goto dn_unlock;
+ }
+
+ /*
+ * Now test for an EEH failure. This is VERY expensive.
+ * Note that the eeh_config_addr may be a parent device
+ * in the case of a device behind a bridge, or it may be
+ * function zero of a multi-function device.
+ * In any case they must share a common PHB.
+ */
+ ret = eeh_ops->get_state(pe, NULL);
+
+ /* Note that config-io to empty slots may fail;
+ * they are empty when they don't have children.
+ * We will punt with the following conditions: Failure to get
+ * PE's state, EEH not support and Permanently unavailable
+ * state, PE is in good state.
+ */
+ if ((ret < 0) ||
+ (ret == EEH_STATE_NOT_SUPPORT) ||
+ (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ eeh_stats.false_positives++;
+ pe->false_positives++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ eeh_stats.slot_resets++;
+
+ /* Avoid repeated reports of this failure, including problems
+ * with other functions on this device, and functions under
+ * bridges.
+ */
+ eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_serialize_unlock(flags);
+
+ eeh_send_failure_event(pe);
+
+ /* Most EEH events are due to device driver bugs. Having
+ * a stack trace will help the device-driver authors figure
+ * out what happened. So print that out.
+ */
+ pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
+ pe->addr, pe->phb->global_number);
+ dump_stack();
+
+ return 1;
+
+dn_unlock:
+ eeh_serialize_unlock(flags);
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
+
+/**
+ * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @token: I/O token, should be address in the form 0xA....
+ * @val: value, should be all 1's (XXX why do we need this arg??)
+ *
+ * Check for an EEH failure at the given token address. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+{
+ unsigned long addr;
+ struct eeh_dev *edev;
+
+ /* Finding the phys addr + pci device; this is pretty quick. */
+ addr = eeh_token_to_phys((unsigned long __force) token);
+ edev = eeh_addr_cache_get_dev(addr);
+ if (!edev) {
+ eeh_stats.no_device++;
+ return val;
+ }
+
+ eeh_dev_check_failure(edev);
+ return val;
+}
+
+EXPORT_SYMBOL(eeh_check_failure);
+
+
+/**
+ * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
+ * @pe: EEH PE
+ *
+ * This routine should be called to reenable frozen MMIO or DMA
+ * so that it would work correctly again. It's useful while doing
+ * recovery or log collection on the indicated device.
+ */
+int eeh_pci_enable(struct eeh_pe *pe, int function)
+{
+ int rc;
+
+ rc = eeh_ops->set_option(pe, function);
+ if (rc)
+ pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
+ __func__, function, pe->phb->global_number, pe->addr, rc);
+
+ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
+ (function == EEH_OPT_THAW_MMIO))
+ return 0;
+
+ return rc;
+}
+
+/**
+ * pcibios_set_pcie_slot_reset - Set PCI-E reset state
+ * @dev: pci device struct
+ * @state: reset state to enter
+ *
+ * Return value:
+ * 0 if success
+ */
+int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+ struct eeh_pe *pe = edev->pe;
+
+ if (!pe) {
+ pr_err("%s: No PE found on PCI device %s\n",
+ __func__, pci_name(dev));
+ return -EINVAL;
+ }
+
+ switch (state) {
+ case pcie_deassert_reset:
+ eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+ break;
+ case pcie_hot_reset:
+ eeh_ops->reset(pe, EEH_RESET_HOT);
+ break;
+ case pcie_warm_reset:
+ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
+ break;
+ default:
+ return -EINVAL;
+ };
+
+ return 0;
+}
+
+/**
+ * eeh_set_pe_freset - Check the required reset for the indicated device
+ * @data: EEH device
+ * @flag: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collected the information for
+ * the indicated device and its children so that the bunch of the
+ * devices could be reset properly.
+ */
+static void *eeh_set_dev_freset(void *data, void *flag)
+{
+ struct pci_dev *dev;
+ unsigned int *freset = (unsigned int *)flag;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+
+ dev = eeh_dev_to_pci_dev(edev);
+ if (dev)
+ *freset |= dev->needs_freset;
+
+ return NULL;
+}
+
+/**
+ * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
+ * @pe: EEH PE
+ *
+ * Assert the PCI #RST line for 1/4 second.
+ */
+static void eeh_reset_pe_once(struct eeh_pe *pe)
+{
+ unsigned int freset = 0;
+
+ /* Determine type of EEH reset required for
+ * Partitionable Endpoint, a hot-reset (1)
+ * or a fundamental reset (3).
+ * A fundamental reset required by any device under
+ * Partitionable Endpoint trumps hot-reset.
+ */
+ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
+
+ if (freset)
+ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
+ else
+ eeh_ops->reset(pe, EEH_RESET_HOT);
+
+ /* The PCI bus requires that the reset be held high for at least
+ * a 100 milliseconds. We wait a bit longer 'just in case'.
+ */
+#define PCI_BUS_RST_HOLD_TIME_MSEC 250
+ msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
+
+ /* We might get hit with another EEH freeze as soon as the
+ * pci slot reset line is dropped. Make sure we don't miss
+ * these, and clear the flag now.
+ */
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+ eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+
+ /* After a PCI slot has been reset, the PCI Express spec requires
+ * a 1.5 second idle time for the bus to stabilize, before starting
+ * up traffic.
+ */
+#define PCI_BUS_SETTLE_TIME_MSEC 1800
+ msleep(PCI_BUS_SETTLE_TIME_MSEC);
+}
+
+/**
+ * eeh_reset_pe - Reset the indicated PE
+ * @pe: EEH PE
+ *
+ * This routine should be called to reset indicated device, including
+ * PE. A PE might include multiple PCI devices and sometimes PCI bridges
+ * might be involved as well.
+ */
+int eeh_reset_pe(struct eeh_pe *pe)
+{
+ int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+ int i, rc;
+
+ /* Take three shots at resetting the bus */
+ for (i=0; i<3; i++) {
+ eeh_reset_pe_once(pe);
+
+ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ if ((rc & flags) == flags)
+ return 0;
+
+ if (rc < 0) {
+ pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+ __func__, pe->phb->global_number, pe->addr);
+ return -1;
+ }
+ pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
+ i+1, pe->phb->global_number, pe->addr, rc);
+ }
+
+ return -1;
+}
+
+/**
+ * eeh_save_bars - Save device bars
+ * @edev: PCI device associated EEH device
+ *
+ * Save the values of the device bars. Unlike the restore
+ * routine, this routine is *not* recursive. This is because
+ * PCI devices are added individually; but, for the restore,
+ * an entire slot is reset at a time.
+ */
+void eeh_save_bars(struct eeh_dev *edev)
+{
+ int i;
+ struct device_node *dn;
+
+ if (!edev)
+ return;
+ dn = eeh_dev_to_of_node(edev);
+
+ for (i = 0; i < 16; i++)
+ eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+}
+
+/**
+ * eeh_ops_register - Register platform dependent EEH operations
+ * @ops: platform dependent EEH operations
+ *
+ * Register the platform dependent EEH operation callback
+ * functions. The platform should call this function before
+ * any other EEH operations.
+ */
+int __init eeh_ops_register(struct eeh_ops *ops)
+{
+ if (!ops->name) {
+ pr_warning("%s: Invalid EEH ops name for %p\n",
+ __func__, ops);
+ return -EINVAL;
+ }
+
+ if (eeh_ops && eeh_ops != ops) {
+ pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
+ __func__, eeh_ops->name, ops->name);
+ return -EEXIST;
+ }
+
+ eeh_ops = ops;
+
+ return 0;
+}
+
+/**
+ * eeh_ops_unregister - Unreigster platform dependent EEH operations
+ * @name: name of EEH platform operations
+ *
+ * Unregister the platform dependent EEH operation callback
+ * functions.
+ */
+int __exit eeh_ops_unregister(const char *name)
+{
+ if (!name || !strlen(name)) {
+ pr_warning("%s: Invalid EEH ops name\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (eeh_ops && !strcmp(eeh_ops->name, name)) {
+ eeh_ops = NULL;
+ return 0;
+ }
+
+ return -EEXIST;
+}
+
+/**
+ * eeh_init - EEH initialization
+ *
+ * Initialize EEH by trying to enable it for all of the adapters in the system.
+ * As a side effect we can determine here if eeh is supported at all.
+ * Note that we leave EEH on so failed config cycles won't cause a machine
+ * check. If a user turns off EEH for a particular adapter they are really
+ * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
+ * grant access to a slot if EEH isn't enabled, and so we always enable
+ * EEH for all slots/all devices.
+ *
+ * The eeh-force-off option disables EEH checking globally, for all slots.
+ * Even if force-off is set, the EEH hardware is still enabled, so that
+ * newer systems can boot.
+ */
+int eeh_init(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct device_node *phb;
+ static int cnt = 0;
+ int ret = 0;
+
+ /*
+ * We have to delay the initialization on PowerNV after
+ * the PCI hierarchy tree has been built because the PEs
+ * are figured out based on PCI devices instead of device
+ * tree nodes
+ */
+ if (machine_is(powernv) && cnt++ <= 0)
+ return ret;
+
+ /* call platform initialization function */
+ if (!eeh_ops) {
+ pr_warning("%s: Platform EEH operation not found\n",
+ __func__);
+ return -EEXIST;
+ } else if ((ret = eeh_ops->init())) {
+ pr_warning("%s: Failed to call platform init function (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ /* Initialize EEH event */
+ ret = eeh_event_init();
+ if (ret)
+ return ret;
+
+ /* Enable EEH for all adapters */
+ if (eeh_probe_mode_devtree()) {
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node) {
+ phb = hose->dn;
+ traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
+ }
+ } else if (eeh_probe_mode_dev()) {
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node)
+ pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
+ } else {
+ pr_warning("%s: Invalid probe mode %d\n",
+ __func__, eeh_probe_mode);
+ return -EINVAL;
+ }
+
+ /*
+ * Call platform post-initialization. Actually, It's good chance
+ * to inform platform that EEH is ready to supply service if the
+ * I/O cache stuff has been built up.
+ */
+ if (eeh_ops->post_init) {
+ ret = eeh_ops->post_init();
+ if (ret)
+ return ret;
+ }
+
+ if (eeh_subsystem_enabled)
+ pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ pr_warning("EEH: No capable adapters found\n");
+
+ return ret;
+}
+
+core_initcall_sync(eeh_init);
+
+/**
+ * eeh_add_device_early - Enable EEH for the indicated device_node
+ * @dn: device node for which to set up EEH
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ * This routine must be called before any i/o is performed to the
+ * adapter (inluding any config-space i/o).
+ * Whether this actually enables EEH or not for this device depends
+ * on the CEC architecture, type of the device, on earlier boot
+ * command-line arguments & etc.
+ */
+void eeh_add_device_early(struct device_node *dn)
+{
+ struct pci_controller *phb;
+
+ /*
+ * If we're doing EEH probe based on PCI device, we
+ * would delay the probe until late stage because
+ * the PCI device isn't available this moment.
+ */
+ if (!eeh_probe_mode_devtree())
+ return;
+
+ if (!of_node_to_eeh_dev(dn))
+ return;
+ phb = of_node_to_eeh_dev(dn)->phb;
+
+ /* USB Bus children of PCI devices will not have BUID's */
+ if (NULL == phb || 0 == phb->buid)
+ return;
+
+ eeh_ops->of_probe(dn, NULL);
+}
+
+/**
+ * eeh_add_device_tree_early - Enable EEH for the indicated device
+ * @dn: device node
+ *
+ * This routine must be used to perform EEH initialization for the
+ * indicated PCI device that was added after system boot (e.g.
+ * hotplug, dlpar).
+ */
+void eeh_add_device_tree_early(struct device_node *dn)
+{
+ struct device_node *sib;
+
+ for_each_child_of_node(dn, sib)
+ eeh_add_device_tree_early(sib);
+ eeh_add_device_early(dn);
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
+
+/**
+ * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine must be used to complete EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ */
+void eeh_add_device_late(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+
+ if (!dev || !eeh_subsystem_enabled)
+ return;
+
+ pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+ dn = pci_device_to_OF_node(dev);
+ edev = of_node_to_eeh_dev(dn);
+ if (edev->pdev == dev) {
+ pr_debug("EEH: Already referenced !\n");
+ return;
+ }
+
+ /*
+ * The EEH cache might not be removed correctly because of
+ * unbalanced kref to the device during unplug time, which
+ * relies on pcibios_release_device(). So we have to remove
+ * that here explicitly.
+ */
+ if (edev->pdev) {
+ eeh_rmv_from_parent_pe(edev);
+ eeh_addr_cache_rmv_dev(edev->pdev);
+ eeh_sysfs_remove_device(edev->pdev);
+ edev->mode &= ~EEH_DEV_SYSFS;
+
+ edev->pdev = NULL;
+ dev->dev.archdata.edev = NULL;
+ }
+
+ edev->pdev = dev;
+ dev->dev.archdata.edev = edev;
+
+ /*
+ * We have to do the EEH probe here because the PCI device
+ * hasn't been created yet in the early stage.
+ */
+ if (eeh_probe_mode_dev())
+ eeh_ops->dev_probe(dev, NULL);
+
+ eeh_addr_cache_insert_dev(dev);
+}
+
+/**
+ * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
+ * @bus: PCI bus
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices which are attached to the indicated PCI bus. The PCI bus
+ * is added after system boot through hotplug or dlpar.
+ */
+void eeh_add_device_tree_late(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ eeh_add_device_late(dev);
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ struct pci_bus *subbus = dev->subordinate;
+ if (subbus)
+ eeh_add_device_tree_late(subbus);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
+
+/**
+ * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
+ * @bus: PCI bus
+ *
+ * This routine must be used to add EEH sysfs files for PCI
+ * devices which are attached to the indicated PCI bus. The PCI bus
+ * is added after system boot through hotplug or dlpar.
+ */
+void eeh_add_sysfs_files(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ eeh_sysfs_add_device(dev);
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ struct pci_bus *subbus = dev->subordinate;
+ if (subbus)
+ eeh_add_sysfs_files(subbus);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
+
+/**
+ * eeh_remove_device - Undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be called when a device is removed from
+ * a running system (e.g. by hotplug or dlpar). It unregisters
+ * the PCI device from the EEH subsystem. I/O errors affecting
+ * this device will no longer be detected after this call; thus,
+ * i/o errors affecting this slot may leave this device unusable.
+ */
+void eeh_remove_device(struct pci_dev *dev)
+{
+ struct eeh_dev *edev;
+
+ if (!dev || !eeh_subsystem_enabled)
+ return;
+ edev = pci_dev_to_eeh_dev(dev);
+
+ /* Unregister the device with the EEH/PCI address search system */
+ pr_debug("EEH: Removing device %s\n", pci_name(dev));
+
+ if (!edev || !edev->pdev || !edev->pe) {
+ pr_debug("EEH: Not referenced !\n");
+ return;
+ }
+
+ /*
+ * During the hotplug for EEH error recovery, we need the EEH
+ * device attached to the parent PE in order for BAR restore
+ * a bit later. So we keep it for BAR restore and remove it
+ * from the parent PE during the BAR resotre.
+ */
+ edev->pdev = NULL;
+ dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_rmv_from_parent_pe(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
+
+ eeh_addr_cache_rmv_dev(dev);
+ eeh_sysfs_remove_device(dev);
+ edev->mode &= ~EEH_DEV_SYSFS;
+}
+
+static int proc_eeh_show(struct seq_file *m, void *v)
+{
+ if (0 == eeh_subsystem_enabled) {
+ seq_printf(m, "EEH Subsystem is globally disabled\n");
+ seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
+ } else {
+ seq_printf(m, "EEH Subsystem is enabled\n");
+ seq_printf(m,
+ "no device=%llu\n"
+ "no device node=%llu\n"
+ "no config address=%llu\n"
+ "check not wanted=%llu\n"
+ "eeh_total_mmio_ffs=%llu\n"
+ "eeh_false_positives=%llu\n"
+ "eeh_slot_resets=%llu\n",
+ eeh_stats.no_device,
+ eeh_stats.no_dn,
+ eeh_stats.no_cfg_addr,
+ eeh_stats.ignored_check,
+ eeh_stats.total_mmio_ffs,
+ eeh_stats.false_positives,
+ eeh_stats.slot_resets);
+ }
+
+ return 0;
+}
+
+static int proc_eeh_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_eeh_show, NULL);
+}
+
+static const struct file_operations proc_eeh_operations = {
+ .open = proc_eeh_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init eeh_init_proc(void)
+{
+ if (machine_is(pseries) || machine_is(powernv))
+ proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+ return 0;
+}
+__initcall(eeh_init_proc);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
new file mode 100644
index 000000000000..e8c9fd546a5c
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -0,0 +1,310 @@
+/*
+ * PCI address cache; allows the lookup of PCI devices based on I/O address
+ *
+ * Copyright IBM Corporation 2004
+ * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+
+/**
+ * The pci address cache subsystem. This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found. It is safe to perform an address lookup in an interrupt
+ * context; this ability is an important feature.
+ *
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
+ */
+struct pci_io_addr_range {
+ struct rb_node rb_node;
+ unsigned long addr_lo;
+ unsigned long addr_hi;
+ struct eeh_dev *edev;
+ struct pci_dev *pcidev;
+ unsigned int flags;
+};
+
+static struct pci_io_addr_cache {
+ struct rb_root rb_root;
+ spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
+{
+ struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (addr < piar->addr_lo)
+ n = n->rb_left;
+ else if (addr > piar->addr_hi)
+ n = n->rb_right;
+ else
+ return piar->edev;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_addr_cache_get_dev - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address. Be sure to pci_dev_put the device
+ * when finished. I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
+{
+ struct eeh_dev *edev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ edev = __eeh_addr_cache_get_device(addr);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+ return edev;
+}
+
+#ifdef DEBUG
+/*
+ * Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache.
+ */
+static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
+{
+ struct rb_node *n;
+ int cnt = 0;
+
+ n = rb_first(&cache->rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+ pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
+ piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+ cnt++;
+ n = rb_next(n);
+ }
+}
+#endif
+
+/* Insert address range into the rb tree. */
+static struct pci_io_addr_range *
+eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+ unsigned long ahi, unsigned int flags)
+{
+ struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct pci_io_addr_range *piar;
+
+ /* Walk tree, find a place to insert into tree */
+ while (*p) {
+ parent = *p;
+ piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
+ if (ahi < piar->addr_lo) {
+ p = &parent->rb_left;
+ } else if (alo > piar->addr_hi) {
+ p = &parent->rb_right;
+ } else {
+ if (dev != piar->pcidev ||
+ alo != piar->addr_lo || ahi != piar->addr_hi) {
+ pr_warning("PIAR: overlapping address range\n");
+ }
+ return piar;
+ }
+ }
+ piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+ if (!piar)
+ return NULL;
+
+ piar->addr_lo = alo;
+ piar->addr_hi = ahi;
+ piar->edev = pci_dev_to_eeh_dev(dev);
+ piar->pcidev = dev;
+ piar->flags = flags;
+
+#ifdef DEBUG
+ pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
+ alo, ahi, pci_name(dev));
+#endif
+
+ rb_link_node(&piar->rb_node, parent, p);
+ rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+
+ return piar;
+}
+
+static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+ int i;
+
+ dn = pci_device_to_OF_node(dev);
+ if (!dn) {
+ pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
+ return;
+ }
+
+ edev = of_node_to_eeh_dev(dn);
+ if (!edev) {
+ pr_warning("PCI: no EEH dev found for dn=%s\n",
+ dn->full_name);
+ return;
+ }
+
+ /* Skip any devices for which EEH is not enabled. */
+ if (!eeh_probe_mode_dev() && !edev->pe) {
+#ifdef DEBUG
+ pr_info("PCI: skip building address cache for=%s - %s\n",
+ pci_name(dev), dn->full_name);
+#endif
+ return;
+ }
+
+ /* Walk resources on this device, poke them into the tree */
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ unsigned long start = pci_resource_start(dev,i);
+ unsigned long end = pci_resource_end(dev,i);
+ unsigned int flags = pci_resource_flags(dev,i);
+
+ /* We are interested only bus addresses, not dma or other stuff */
+ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+ continue;
+ if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+ continue;
+ eeh_addr_cache_insert(dev, start, end, flags);
+ }
+}
+
+/**
+ * eeh_addr_cache_insert_dev - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+void eeh_addr_cache_insert_dev(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ /* Ignore PCI bridges */
+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+ return;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __eeh_addr_cache_insert_dev(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
+{
+ struct rb_node *n;
+
+restart:
+ n = rb_first(&pci_io_addr_cache_root.rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (piar->pcidev == dev) {
+ rb_erase(n, &pci_io_addr_cache_root.rb_root);
+ kfree(piar);
+ goto restart;
+ }
+ n = rb_next(n);
+ }
+}
+
+/**
+ * eeh_addr_cache_rmv_dev - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __eeh_addr_cache_rmv_dev(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * eeh_addr_cache_build - Build a cache of I/O addresses
+ *
+ * Build a cache of pci i/o addresses. This cache will be used to
+ * find the pci device that corresponds to a given address.
+ * This routine scans all pci busses to build the cache.
+ * Must be run late in boot process, after the pci controllers
+ * have been scanned for devices (after all device resources are known).
+ */
+void eeh_addr_cache_build(void)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+ struct pci_dev *dev = NULL;
+
+ spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+
+ for_each_pci_dev(dev) {
+ dn = pci_device_to_OF_node(dev);
+ if (!dn)
+ continue;
+
+ edev = of_node_to_eeh_dev(dn);
+ if (!edev)
+ continue;
+
+ dev->dev.archdata.edev = edev;
+ edev->pdev = dev;
+
+ eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
+ }
+
+#ifdef DEBUG
+ /* Verify tree built up above, echo back the list of addrs. */
+ eeh_addr_cache_print(&pci_io_addr_cache_root);
+#endif
+}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
new file mode 100644
index 000000000000..1efa28f5fc54
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -0,0 +1,112 @@
+/*
+ * The file intends to implement dynamic creation of EEH device, which will
+ * be bound with OF node and PCI device simutaneously. The EEH devices would
+ * be foundamental information for EEH core components to work proerly. Besides,
+ * We have to support multiple situations where dynamic creation of EEH device
+ * is required:
+ *
+ * 1) Before PCI emunation starts, we need create EEH devices according to the
+ * PCI sensitive OF nodes.
+ * 2) When PCI emunation is done, we need do the binding between PCI device and
+ * the associated EEH device.
+ * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
+ * will be created while PCI sensitive OF node is detected from DR.
+ * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
+ * PHB is newly inserted, we also need create EEH devices accordingly.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+/**
+ * eeh_dev_init - Create EEH device according to OF node
+ * @dn: device node
+ * @data: PHB
+ *
+ * It will create EEH device according to the given OF node. The function
+ * might be called by PCI emunation, DR, PHB hotplug.
+ */
+void *eeh_dev_init(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ struct eeh_dev *edev;
+
+ /* Allocate EEH device */
+ edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+ if (!edev) {
+ pr_warning("%s: out of memory\n", __func__);
+ return NULL;
+ }
+
+ /* Associate EEH device with OF node */
+ PCI_DN(dn)->edev = edev;
+ edev->dn = dn;
+ edev->phb = phb;
+ INIT_LIST_HEAD(&edev->list);
+
+ return NULL;
+}
+
+/**
+ * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
+ * @phb: PHB
+ *
+ * Scan the PHB OF node and its child association, then create the
+ * EEH devices accordingly
+ */
+void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node *dn = phb->dn;
+
+ /* EEH PE for PHB */
+ eeh_phb_pe_create(phb);
+
+ /* EEH device for PHB */
+ eeh_dev_init(dn, phb);
+
+ /* EEH devices for children OF nodes */
+ traverse_pci_devices(dn, eeh_dev_init, phb);
+}
+
+/**
+ * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
+ *
+ * Scan all the existing PHBs and create EEH devices for their OF
+ * nodes and their children OF nodes
+ */
+static int __init eeh_dev_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ eeh_dev_phb_init_dynamic(phb);
+
+ pr_info("EEH: devices created\n");
+
+ return 0;
+}
+
+core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
new file mode 100644
index 000000000000..36bed5a12750
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -0,0 +1,732 @@
+/*
+ * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
+ * Copyright IBM Corp. 2004 2005
+ * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+
+/**
+ * eeh_pcid_name - Retrieve name of PCI device driver
+ * @pdev: PCI device
+ *
+ * This routine is used to retrieve the name of PCI device driver
+ * if that's valid.
+ */
+static inline const char *eeh_pcid_name(struct pci_dev *pdev)
+{
+ if (pdev && pdev->dev.driver)
+ return pdev->dev.driver->name;
+ return "";
+}
+
+/**
+ * eeh_pcid_get - Get the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is used to retrieve the PCI device driver for
+ * the indicated PCI device. Besides, we will increase the reference
+ * of the PCI device driver to prevent that being unloaded on
+ * the fly. Otherwise, kernel crash would be seen.
+ */
+static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return NULL;
+
+ if (!try_module_get(pdev->driver->driver.owner))
+ return NULL;
+
+ return pdev->driver;
+}
+
+/**
+ * eeh_pcid_put - Dereference on the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is called to do dereference on the PCI device
+ * driver of the indicated PCI device.
+ */
+static inline void eeh_pcid_put(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return;
+
+ module_put(pdev->driver->driver.owner);
+}
+
+#if 0
+static void print_device_node_tree(struct pci_dn *pdn, int dent)
+{
+ int i;
+ struct device_node *pc;
+
+ if (!pdn)
+ return;
+ for (i = 0; i < dent; i++)
+ printk(" ");
+ printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
+ pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
+ pdn->eeh_pe_config_addr, pdn->node->full_name);
+ dent += 3;
+ pc = pdn->node->child;
+ while (pc) {
+ print_device_node_tree(PCI_DN(pc), dent);
+ pc = pc->sibling;
+ }
+}
+#endif
+
+/**
+ * eeh_disable_irq - Disable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called when reporting temporary or permanent
+ * error to the particular PCI device to disable interrupt of that
+ * device. If the device has enabled MSI or MSI-X interrupt, we needn't
+ * do real work because EEH should freeze DMA transfers for those PCI
+ * devices encountering EEH errors, which includes MSI or MSI-X.
+ */
+static void eeh_disable_irq(struct pci_dev *dev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+
+ /* Don't disable MSI and MSI-X interrupts. They are
+ * effectively disabled by the DMA Stopped state
+ * when an EEH error occurs.
+ */
+ if (dev->msi_enabled || dev->msix_enabled)
+ return;
+
+ if (!irq_has_action(dev->irq))
+ return;
+
+ edev->mode |= EEH_DEV_IRQ_DISABLED;
+ disable_irq_nosync(dev->irq);
+}
+
+/**
+ * eeh_enable_irq - Enable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called to enable interrupt while failed
+ * device could be resumed.
+ */
+static void eeh_enable_irq(struct pci_dev *dev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+ struct irq_desc *desc;
+
+ if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
+ edev->mode &= ~EEH_DEV_IRQ_DISABLED;
+
+ desc = irq_to_desc(dev->irq);
+ if (desc && desc->depth > 0)
+ enable_irq(dev->irq);
+ }
+}
+
+/**
+ * eeh_report_error - Report pci error to each device driver
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * Report an EEH error to each device driver, collect up and
+ * merge the device driver responses. Cumulative response
+ * passed back in "userdata".
+ */
+static void *eeh_report_error(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver;
+
+ /* We might not have the associated PCI device,
+ * then we should continue for next one.
+ */
+ if (!dev) return NULL;
+ dev->error_state = pci_channel_io_frozen;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_disable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+
+ /* A driver that needs a reset trumps all others */
+ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * Tells each device driver that IO ports, MMIO and config space I/O
+ * are now enabled. Collects up and merges the device driver responses.
+ * Cumulative response passed back in "userdata".
+ */
+static void *eeh_report_mmio_enabled(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ if (!driver->err_handler ||
+ !driver->err_handler->mmio_enabled) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ rc = driver->err_handler->mmio_enabled(dev);
+
+ /* A driver that needs a reset trumps all others */
+ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_reset - Tell device that slot has been reset
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * This routine must be called while EEH tries to reset particular
+ * PCI device so that the associated PCI device driver could take
+ * some actions, usually to save data the driver needs so that the
+ * driver can work again while the device is recovered.
+ */
+static void *eeh_report_reset(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver;
+
+ if (!dev) return NULL;
+ dev->error_state = pci_channel_io_normal;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_enable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->slot_reset) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ rc = driver->err_handler->slot_reset(dev);
+ if ((*res == PCI_ERS_RESULT_NONE) ||
+ (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
+ if (*res == PCI_ERS_RESULT_DISCONNECT &&
+ rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_resume - Tell device to resume normal operations
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * This routine must be called to notify the device driver that it
+ * could resume so that the device driver can do some initialization
+ * to make the recovered device work again.
+ */
+static void *eeh_report_resume(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_driver *driver;
+
+ if (!dev) return NULL;
+ dev->error_state = pci_channel_io_normal;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_enable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->resume) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ driver->err_handler->resume(dev);
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_failure - Tell device driver that device is dead.
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * This informs the device driver that the device is permanently
+ * dead, and that no further recovery attempts will be made on it.
+ */
+static void *eeh_report_failure(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_driver *driver;
+
+ if (!dev) return NULL;
+ dev->error_state = pci_channel_io_perm_failure;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_disable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+ struct pci_driver *driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ int *removed = (int *)userdata;
+
+ /*
+ * Actually, we should remove the PCI bridges as well.
+ * However, that's lots of complexity to do that,
+ * particularly some of devices under the bridge might
+ * support EEH. So we just care about PCI devices for
+ * simplicity here.
+ */
+ if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+ return NULL;
+ driver = eeh_pcid_get(dev);
+ if (driver && driver->err_handler)
+ return NULL;
+
+ /* Remove it from PCI subsystem */
+ pr_debug("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
+ edev->bus = dev->bus;
+ edev->mode |= EEH_DEV_DISCONNECTED;
+ (*removed)++;
+
+ pci_stop_and_remove_bus_device(dev);
+
+ return NULL;
+}
+
+static void *eeh_pe_detach_dev(void *data, void *userdata)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ struct eeh_dev *edev, *tmp;
+
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ if (!(edev->mode & EEH_DEV_DISCONNECTED))
+ continue;
+
+ edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+ eeh_rmv_from_parent_pe(edev);
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_reset_device - Perform actual reset of a pci slot
+ * @pe: EEH PE
+ * @bus: PCI bus corresponding to the isolcated slot
+ *
+ * This routine must be called to do reset on the indicated PE.
+ * During the reset, udev might be invoked because those affected
+ * PCI devices will be removed and then added.
+ */
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
+{
+ struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
+ struct timeval tstamp;
+ int cnt, rc, removed = 0;
+
+ /* pcibios will clear the counter; save the value */
+ cnt = pe->freeze_count;
+ tstamp = pe->tstamp;
+
+ /*
+ * We don't remove the corresponding PE instances because
+ * we need the information afterwords. The attached EEH
+ * devices are expected to be attached soon when calling
+ * into pcibios_add_pci_devices().
+ */
+ eeh_pe_state_mark(pe, EEH_PE_KEEP);
+ if (bus)
+ pcibios_remove_pci_devices(bus);
+ else if (frozen_bus)
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
+
+ /* Reset the pci controller. (Asserts RST#; resets config space).
+ * Reconfigure bridges and devices. Don't try to bring the system
+ * up if the reset failed for some reason.
+ */
+ rc = eeh_reset_pe(pe);
+ if (rc)
+ return rc;
+
+ /* Restore PE */
+ eeh_ops->configure_bridge(pe);
+ eeh_pe_restore_bars(pe);
+
+ /* Give the system 5 seconds to finish running the user-space
+ * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
+ * this is a hack, but if we don't do this, and try to bring
+ * the device up before the scripts have taken it down,
+ * potentially weird things happen.
+ */
+ if (bus) {
+ pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+ ssleep(5);
+
+ /*
+ * The EEH device is still connected with its parent
+ * PE. We should disconnect it so the binding can be
+ * rebuilt when adding PCI devices.
+ */
+ eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+ pcibios_add_pci_devices(bus);
+ } else if (frozen_bus && removed) {
+ pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+ ssleep(5);
+
+ eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+ pcibios_add_pci_devices(frozen_bus);
+ }
+ eeh_pe_state_clear(pe, EEH_PE_KEEP);
+
+ pe->tstamp = tstamp;
+ pe->freeze_count = cnt;
+
+ return 0;
+}
+
+/* The longest amount of time to wait for a pci device
+ * to come back on line, in seconds.
+ */
+#define MAX_WAIT_FOR_RECOVERY 150
+
+static void eeh_handle_normal_event(struct eeh_pe *pe)
+{
+ struct pci_bus *frozen_bus;
+ int rc = 0;
+ enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+
+ frozen_bus = eeh_pe_bus_get(pe);
+ if (!frozen_bus) {
+ pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ __func__, pe->phb->global_number, pe->addr);
+ return;
+ }
+
+ eeh_pe_update_time_stamp(pe);
+ pe->freeze_count++;
+ if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
+ goto excess_failures;
+ pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
+ pe->freeze_count);
+
+ /* Walk the various device drivers attached to this slot through
+ * a reset sequence, giving each an opportunity to do what it needs
+ * to accomplish the reset. Each child gets a report of the
+ * status ... if any child can't handle the reset, then the entire
+ * slot is dlpar removed and added.
+ */
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+
+ /* Get the current PCI slot state. This can take a long time,
+ * sometimes over 3 seconds for certain systems.
+ */
+ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warning("EEH: Permanent failure\n");
+ goto hard_fail;
+ }
+
+ /* Since rtas may enable MMIO when posting the error log,
+ * don't post the error log until after all dev drivers
+ * have been informed.
+ */
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+
+ /* If all device drivers were EEH-unaware, then shut
+ * down all of the device drivers, and hope they
+ * go down willingly, without panicing the system.
+ */
+ if (result == PCI_ERS_RESULT_NONE) {
+ pr_info("EEH: Reset with hotplug activity\n");
+ rc = eeh_reset_device(pe, frozen_bus);
+ if (rc) {
+ pr_warning("%s: Unable to reset, err=%d\n",
+ __func__, rc);
+ goto hard_fail;
+ }
+ }
+
+ /* If all devices reported they can proceed, then re-enable MMIO */
+ if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+ pr_info("EEH: Enable I/O for affected devices\n");
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+
+ if (rc < 0)
+ goto hard_fail;
+ if (rc) {
+ result = PCI_ERS_RESULT_NEED_RESET;
+ } else {
+ pr_info("EEH: Notify device drivers to resume I/O\n");
+ result = PCI_ERS_RESULT_NONE;
+ eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
+ }
+ }
+
+ /* If all devices reported they can proceed, then re-enable DMA */
+ if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+ pr_info("EEH: Enabled DMA for affected devices\n");
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+
+ if (rc < 0)
+ goto hard_fail;
+ if (rc)
+ result = PCI_ERS_RESULT_NEED_RESET;
+ else
+ result = PCI_ERS_RESULT_RECOVERED;
+ }
+
+ /* If any device has a hard failure, then shut off everything. */
+ if (result == PCI_ERS_RESULT_DISCONNECT) {
+ pr_warning("EEH: Device driver gave up\n");
+ goto hard_fail;
+ }
+
+ /* If any device called out for a reset, then reset the slot */
+ if (result == PCI_ERS_RESULT_NEED_RESET) {
+ pr_info("EEH: Reset without hotplug activity\n");
+ rc = eeh_reset_device(pe, NULL);
+ if (rc) {
+ pr_warning("%s: Cannot reset, err=%d\n",
+ __func__, rc);
+ goto hard_fail;
+ }
+
+ pr_info("EEH: Notify device drivers "
+ "the completion of reset\n");
+ result = PCI_ERS_RESULT_NONE;
+ eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+ }
+
+ /* All devices should claim they have recovered by now. */
+ if ((result != PCI_ERS_RESULT_RECOVERED) &&
+ (result != PCI_ERS_RESULT_NONE)) {
+ pr_warning("EEH: Not recovered\n");
+ goto hard_fail;
+ }
+
+ /* Tell all device drivers that they can resume operations */
+ pr_info("EEH: Notify device driver to resume\n");
+ eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+
+ return;
+
+excess_failures:
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+ "last hour and has been permanently disabled.\n"
+ "Please try reseating or replacing it.\n",
+ pe->phb->global_number, pe->addr,
+ pe->freeze_count);
+ goto perm_error;
+
+hard_fail:
+ pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
+
+perm_error:
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
+
+ /* Notify all devices that they're about to go down. */
+ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
+
+ /* Shut down the device drivers for good. */
+ if (frozen_bus)
+ pcibios_remove_pci_devices(frozen_bus);
+}
+
+static void eeh_handle_special_event(void)
+{
+ struct eeh_pe *pe, *phb_pe;
+ struct pci_bus *bus;
+ struct pci_controller *hose, *tmp;
+ unsigned long flags;
+ int rc = 0;
+
+ /*
+ * The return value from next_error() has been classified as follows.
+ * It might be good to enumerate them. However, next_error() is only
+ * supported by PowerNV platform for now. So it would be fine to use
+ * integer directly:
+ *
+ * 4 - Dead IOC 3 - Dead PHB
+ * 2 - Fenced PHB 1 - Frozen PE
+ * 0 - No error found
+ *
+ */
+ rc = eeh_ops->next_error(&pe);
+ if (rc <= 0)
+ return;
+
+ switch (rc) {
+ case 4:
+ /* Mark all PHBs in dead state */
+ eeh_serialize_lock(&flags);
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe) continue;
+
+ eeh_pe_state_mark(phb_pe,
+ EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ }
+ eeh_serialize_unlock(flags);
+
+ /* Purge all events */
+ eeh_remove_event(NULL);
+ break;
+ case 3:
+ case 2:
+ case 1:
+ /* Mark the PE in fenced state */
+ eeh_serialize_lock(&flags);
+ if (rc == 3)
+ eeh_pe_state_mark(pe,
+ EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+ else
+ eeh_pe_state_mark(pe,
+ EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+ eeh_serialize_unlock(flags);
+
+ /* Purge all events of the PHB */
+ eeh_remove_event(pe);
+ break;
+ default:
+ pr_err("%s: Invalid value %d from next_error()\n",
+ __func__, rc);
+ return;
+ }
+
+ /*
+ * For fenced PHB and frozen PE, it's handled as normal
+ * event. We have to remove the affected PHBs for dead
+ * PHB and IOC
+ */
+ if (rc == 2 || rc == 1)
+ eeh_handle_normal_event(pe);
+ else {
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD))
+ continue;
+
+ bus = eeh_pe_bus_get(phb_pe);
+ /* Notify all devices that they're about to go down. */
+ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
+ pcibios_remove_pci_devices(bus);
+ }
+ }
+}
+
+/**
+ * eeh_handle_event - Reset a PCI device after hard lockup.
+ * @pe: EEH PE
+ *
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
+ */
+void eeh_handle_event(struct eeh_pe *pe)
+{
+ if (pe)
+ eeh_handle_normal_event(pe);
+ else
+ eeh_handle_special_event();
+}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
new file mode 100644
index 000000000000..d27c5afc90ae
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -0,0 +1,182 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+
+/** Overview:
+ * EEH error states may be detected within exception handlers;
+ * however, the recovery processing needs to occur asynchronously
+ * in a normal kernel context and not an interrupt context.
+ * This pair of routines creates an event and queues it onto a
+ * work-queue, where a worker thread can drive recovery.
+ */
+
+static DEFINE_SPINLOCK(eeh_eventlist_lock);
+static struct semaphore eeh_eventlist_sem;
+LIST_HEAD(eeh_eventlist);
+
+/**
+ * eeh_event_handler - Dispatch EEH events.
+ * @dummy - unused
+ *
+ * The detection of a frozen slot can occur inside an interrupt,
+ * where it can be hard to do anything about it. The goal of this
+ * routine is to pull these detection events out of the context
+ * of the interrupt handler, and re-dispatch them for processing
+ * at a later time in a normal context.
+ */
+static int eeh_event_handler(void * dummy)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+ struct eeh_pe *pe;
+
+ while (!kthread_should_stop()) {
+ if (down_interruptible(&eeh_eventlist_sem))
+ break;
+
+ /* Fetch EEH event from the queue */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ event = NULL;
+ if (!list_empty(&eeh_eventlist)) {
+ event = list_entry(eeh_eventlist.next,
+ struct eeh_event, list);
+ list_del(&event->list);
+ }
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+ if (!event)
+ continue;
+
+ /* We might have event without binding PE */
+ pe = event->pe;
+ if (pe) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
+ pe->phb->global_number, pe->addr);
+ eeh_handle_event(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ } else {
+ eeh_handle_event(NULL);
+ }
+
+ kfree(event);
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_event_init - Start kernel thread to handle EEH events
+ *
+ * This routine is called to start the kernel thread for processing
+ * EEH event.
+ */
+int eeh_event_init(void)
+{
+ struct task_struct *t;
+ int ret = 0;
+
+ /* Initialize semaphore */
+ sema_init(&eeh_eventlist_sem, 0);
+
+ t = kthread_run(eeh_event_handler, NULL, "eehd");
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ pr_err("%s: Failed to start EEH daemon (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_send_failure_event - Generate a PCI error event
+ * @pe: EEH PE
+ *
+ * This routine can be called within an interrupt context;
+ * the actual event will be delivered in a normal context
+ * (from a workqueue).
+ */
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event) {
+ pr_err("EEH: out of memory, event not handled\n");
+ return -ENOMEM;
+ }
+ event->pe = pe;
+
+ /* We may or may not be called in an interrupt context */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_add(&event->list, &eeh_eventlist);
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+
+ /* For EEH deamon to knick in */
+ up(&eeh_eventlist_sem);
+
+ return 0;
+}
+
+/**
+ * eeh_remove_event - Remove EEH event from the queue
+ * @pe: Event binding to the PE
+ *
+ * On PowerNV platform, we might have subsequent coming events
+ * is part of the former one. For that case, those subsequent
+ * coming events are totally duplicated and unnecessary, thus
+ * they should be removed.
+ */
+void eeh_remove_event(struct eeh_pe *pe)
+{
+ unsigned long flags;
+ struct eeh_event *event, *tmp;
+
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
+ /*
+ * If we don't have valid PE passed in, that means
+ * we already have event corresponding to dead IOC
+ * and all events should be purged.
+ */
+ if (!pe) {
+ list_del(&event->list);
+ kfree(event);
+ } else if (pe->type & EEH_PE_PHB) {
+ if (event->pe && event->pe->phb == pe->phb) {
+ list_del(&event->list);
+ kfree(event);
+ }
+ } else if (event->pe == pe) {
+ list_del(&event->list);
+ kfree(event);
+ }
+ }
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
new file mode 100644
index 000000000000..f9450537e335
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -0,0 +1,792 @@
+/*
+ * The file intends to implement PE based on the information from
+ * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
+ * All the PEs should be organized as hierarchy tree. The first level
+ * of the tree will be associated to existing PHBs since the particular
+ * PE is only meaningful in one PHB domain.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+static LIST_HEAD(eeh_phb_pe);
+
+/**
+ * eeh_pe_alloc - Allocate PE
+ * @phb: PCI controller
+ * @type: PE type
+ *
+ * Allocate PE instance dynamically.
+ */
+static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
+{
+ struct eeh_pe *pe;
+
+ /* Allocate PHB PE */
+ pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
+ if (!pe) return NULL;
+
+ /* Initialize PHB PE */
+ pe->type = type;
+ pe->phb = phb;
+ INIT_LIST_HEAD(&pe->child_list);
+ INIT_LIST_HEAD(&pe->child);
+ INIT_LIST_HEAD(&pe->edevs);
+
+ return pe;
+}
+
+/**
+ * eeh_phb_pe_create - Create PHB PE
+ * @phb: PCI controller
+ *
+ * The function should be called while the PHB is detected during
+ * system boot or PCI hotplug in order to create PHB PE.
+ */
+int eeh_phb_pe_create(struct pci_controller *phb)
+{
+ struct eeh_pe *pe;
+
+ /* Allocate PHB PE */
+ pe = eeh_pe_alloc(phb, EEH_PE_PHB);
+ if (!pe) {
+ pr_err("%s: out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* Put it into the list */
+ list_add_tail(&pe->child, &eeh_phb_pe);
+
+ pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+
+ return 0;
+}
+
+/**
+ * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
+ * @phb: PCI controller
+ *
+ * The overall PEs form hierarchy tree. The first layer of the
+ * hierarchy tree is composed of PHB PEs. The function is used
+ * to retrieve the corresponding PHB PE according to the given PHB.
+ */
+struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+{
+ struct eeh_pe *pe;
+
+ list_for_each_entry(pe, &eeh_phb_pe, child) {
+ /*
+ * Actually, we needn't check the type since
+ * the PE for PHB has been determined when that
+ * was created.
+ */
+ if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
+ return pe;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_next - Retrieve the next PE in the tree
+ * @pe: current PE
+ * @root: root PE
+ *
+ * The function is used to retrieve the next PE in the
+ * hierarchy PE tree.
+ */
+static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
+ struct eeh_pe *root)
+{
+ struct list_head *next = pe->child_list.next;
+
+ if (next == &pe->child_list) {
+ while (1) {
+ if (pe == root)
+ return NULL;
+ next = pe->child.next;
+ if (next != &pe->parent->child_list)
+ break;
+ pe = pe->parent;
+ }
+ }
+
+ return list_entry(next, struct eeh_pe, child);
+}
+
+/**
+ * eeh_pe_traverse - Traverse PEs in the specified PHB
+ * @root: root PE
+ * @fn: callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the specified PE and its
+ * child PEs. The traversing is to be terminated once the
+ * callback returns something other than NULL, or no more PEs
+ * to be traversed.
+ */
+void *eeh_pe_traverse(struct eeh_pe *root,
+ eeh_traverse_func fn, void *flag)
+{
+ struct eeh_pe *pe;
+ void *ret;
+
+ for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ ret = fn(pe, flag);
+ if (ret) return ret;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_dev_traverse - Traverse the devices from the PE
+ * @root: EEH PE
+ * @fn: function callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the devices of the specified
+ * PE and its child PEs.
+ */
+void *eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_traverse_func fn, void *flag)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
+ void *ret;
+
+ if (!root) {
+ pr_warning("%s: Invalid PE %p\n", __func__, root);
+ return NULL;
+ }
+
+ /* Traverse root PE */
+ for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ ret = fn(edev, flag);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * __eeh_pe_get - Check the PE address
+ * @data: EEH PE
+ * @flag: EEH device
+ *
+ * For one particular PE, it can be identified by PE address
+ * or tranditional BDF address. BDF address is composed of
+ * Bus/Device/Function number. The extra data referred by flag
+ * indicates which type of address should be used.
+ */
+static void *__eeh_pe_get(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ struct eeh_dev *edev = (struct eeh_dev *)flag;
+
+ /* Unexpected PHB PE */
+ if (pe->type & EEH_PE_PHB)
+ return NULL;
+
+ /* We prefer PE address */
+ if (edev->pe_config_addr &&
+ (edev->pe_config_addr == pe->addr))
+ return pe;
+
+ /* Try BDF address */
+ if (edev->config_addr &&
+ (edev->config_addr == pe->config_addr))
+ return pe;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_get - Search PE based on the given address
+ * @edev: EEH device
+ *
+ * Search the corresponding PE based on the specified address which
+ * is included in the eeh device. The function is used to check if
+ * the associated PE has been created against the PE address. It's
+ * notable that the PE address has 2 format: traditional PE address
+ * which is composed of PCI bus/device/function number, or unified
+ * PE address.
+ */
+struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+{
+ struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+ struct eeh_pe *pe;
+
+ pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+
+ return pe;
+}
+
+/**
+ * eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
+{
+ struct device_node *dn;
+ struct eeh_dev *parent;
+
+ /*
+ * It might have the case for the indirect parent
+ * EEH device already having associated PE, but
+ * the direct parent EEH device doesn't have yet.
+ */
+ dn = edev->dn->parent;
+ while (dn) {
+ /* We're poking out of PCI territory */
+ if (!PCI_DN(dn)) return NULL;
+
+ parent = of_node_to_eeh_dev(dn);
+ /* We're poking out of PCI territory */
+ if (!parent) return NULL;
+
+ if (parent->pe)
+ return parent->pe;
+
+ dn = dn->parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_add_to_parent_pe - Add EEH device to parent PE
+ * @edev: EEH device
+ *
+ * Add EEH device to the parent PE. If the parent PE already
+ * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
+ * we have to create new PE to hold the EEH device and the new
+ * PE will be linked to its parent PE as well.
+ */
+int eeh_add_to_parent_pe(struct eeh_dev *edev)
+{
+ struct eeh_pe *pe, *parent;
+
+ /*
+ * Search the PE has been existing or not according
+ * to the PE address. If that has been existing, the
+ * PE should be composed of PCI bus and its subordinate
+ * components.
+ */
+ pe = eeh_pe_get(edev);
+ if (pe && !(pe->type & EEH_PE_INVALID)) {
+ if (!edev->pe_config_addr) {
+ pr_err("%s: PE with addr 0x%x already exists\n",
+ __func__, edev->config_addr);
+ return -EEXIST;
+ }
+
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
+
+ /* Put the edev to PE */
+ list_add_tail(&edev->list, &pe->edevs);
+ pr_debug("EEH: Add %s to Bus PE#%x\n",
+ edev->dn->full_name, pe->addr);
+
+ return 0;
+ } else if (pe && (pe->type & EEH_PE_INVALID)) {
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
+ parent = parent->parent;
+ }
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
+ }
+
+ /* Create a new EEH PE */
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ if (!pe) {
+ pr_err("%s: out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+ pe->addr = edev->pe_config_addr;
+ pe->config_addr = edev->config_addr;
+
+ /*
+ * While doing PE reset, we probably hot-reset the
+ * upstream bridge. However, the PCI devices including
+ * the associated EEH devices might be removed when EEH
+ * core is doing recovery. So that won't safe to retrieve
+ * the bridge through downstream EEH device. We have to
+ * trace the parent PCI bus, then the upstream bridge.
+ */
+ if (eeh_probe_mode_dev())
+ pe->bus = eeh_dev_to_pci_dev(edev)->bus;
+
+ /*
+ * Put the new EEH PE into hierarchy tree. If the parent
+ * can't be found, the newly created PE will be attached
+ * to PHB directly. Otherwise, we have to associate the
+ * PE with its parent.
+ */
+ parent = eeh_pe_get_parent(edev);
+ if (!parent) {
+ parent = eeh_phb_pe_get(edev->phb);
+ if (!parent) {
+ pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
+ __func__, edev->phb->global_number);
+ edev->pe = NULL;
+ kfree(pe);
+ return -EEXIST;
+ }
+ }
+ pe->parent = parent;
+
+ /*
+ * Put the newly created PE into the child list and
+ * link the EEH device accordingly.
+ */
+ list_add_tail(&pe->child, &parent->child_list);
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
+}
+
+/**
+ * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * @edev: EEH device
+ *
+ * The PE hierarchy tree might be changed when doing PCI hotplug.
+ * Also, the PCI devices or buses could be removed from the system
+ * during EEH recovery. So we have to call the function remove the
+ * corresponding PE accordingly if necessary.
+ */
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+{
+ struct eeh_pe *pe, *parent, *child;
+ int cnt;
+
+ if (!edev->pe) {
+ pr_debug("%s: No PE found for EEH device %s\n",
+ __func__, edev->dn->full_name);
+ return -EEXIST;
+ }
+
+ /* Remove the EEH device */
+ pe = edev->pe;
+ edev->pe = NULL;
+ list_del(&edev->list);
+
+ /*
+ * Check if the parent PE includes any EEH devices.
+ * If not, we should delete that. Also, we should
+ * delete the parent PE if it doesn't have associated
+ * child PEs and EEH devices.
+ */
+ while (1) {
+ parent = pe->parent;
+ if (pe->type & EEH_PE_PHB)
+ break;
+
+ if (!(pe->state & EEH_PE_KEEP)) {
+ if (list_empty(&pe->edevs) &&
+ list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ } else {
+ break;
+ }
+ } else {
+ if (list_empty(&pe->edevs)) {
+ cnt = 0;
+ list_for_each_entry(child, &pe->child_list, child) {
+ if (!(child->type & EEH_PE_INVALID)) {
+ cnt++;
+ break;
+ }
+ }
+
+ if (!cnt)
+ pe->type |= EEH_PE_INVALID;
+ else
+ break;
+ }
+ }
+
+ pe = parent;
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_pe_update_time_stamp - Update PE's frozen time stamp
+ * @pe: EEH PE
+ *
+ * We have time stamp for each PE to trace its time of getting
+ * frozen in last hour. The function should be called to update
+ * the time stamp on first error of the specific PE. On the other
+ * handle, we needn't account for errors happened in last hour.
+ */
+void eeh_pe_update_time_stamp(struct eeh_pe *pe)
+{
+ struct timeval tstamp;
+
+ if (!pe) return;
+
+ if (pe->freeze_count <= 0) {
+ pe->freeze_count = 0;
+ do_gettimeofday(&pe->tstamp);
+ } else {
+ do_gettimeofday(&tstamp);
+ if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
+ pe->tstamp = tstamp;
+ pe->freeze_count = 0;
+ }
+ }
+}
+
+/**
+ * __eeh_pe_state_mark - Mark the state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to mark the indicated state for the given
+ * PE. Also, the associated PCI devices will be put into IO frozen
+ * state as well.
+ */
+static void *__eeh_pe_state_mark(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int state = *((int *)flag);
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
+
+ /*
+ * Mark the PE with the indicated state. Also,
+ * the associated PCI device will be put into
+ * I/O frozen state to avoid I/O accesses from
+ * the PCI device driver.
+ */
+ pe->state |= state;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
+ *
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
+ */
+void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+{
+ eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+}
+
+/**
+ * __eeh_pe_state_clear - Clear state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to clear the indicated state from the
+ * given PE. Besides, we also clear the check count of the PE
+ * as well.
+ */
+static void *__eeh_pe_state_clear(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int state = *((int *)flag);
+
+ pe->state &= ~state;
+ pe->check_count = 0;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_state_clear - Clear state for the PE and its children
+ * @pe: PE
+ * @state: state to be cleared
+ *
+ * When the PE and its children has been recovered from error,
+ * we need clear the error state for that. The function is used
+ * for the purpose.
+ */
+void eeh_pe_state_clear(struct eeh_pe *pe, int state)
+{
+ eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+}
+
+/*
+ * Some PCI bridges (e.g. PLX bridges) have primary/secondary
+ * buses assigned explicitly by firmware, and we probably have
+ * lost that after reset. So we have to delay the check until
+ * the PCI-CFG registers have been restored for the parent
+ * bridge.
+ *
+ * Don't use normal PCI-CFG accessors, which probably has been
+ * blocked on normal path during the stage. So we need utilize
+ * eeh operations, which is always permitted.
+ */
+static void eeh_bridge_check_link(struct eeh_dev *edev,
+ struct device_node *dn)
+{
+ int cap;
+ uint32_t val;
+ int timeout = 0;
+
+ /*
+ * We only check root port and downstream ports of
+ * PCIe switches
+ */
+ if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
+ return;
+
+ pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
+ __func__, edev->phb->global_number,
+ edev->config_addr >> 8,
+ PCI_SLOT(edev->config_addr & 0xFF),
+ PCI_FUNC(edev->config_addr & 0xFF));
+
+ /* Check slot status */
+ cap = edev->pcie_cap;
+ eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
+ if (!(val & PCI_EXP_SLTSTA_PDS)) {
+ pr_debug(" No card in the slot (0x%04x) !\n", val);
+ return;
+ }
+
+ /* Check power status if we have the capability */
+ eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
+ if (val & PCI_EXP_SLTCAP_PCP) {
+ eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
+ if (val & PCI_EXP_SLTCTL_PCC) {
+ pr_debug(" In power-off state, power it on ...\n");
+ val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
+ val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
+ eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
+ msleep(2 * 1000);
+ }
+ }
+
+ /* Enable link */
+ eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
+ val &= ~PCI_EXP_LNKCTL_LD;
+ eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
+
+ /* Check link */
+ eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
+ if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
+ pr_debug(" No link reporting capability (0x%08x) \n", val);
+ msleep(1000);
+ return;
+ }
+
+ /* Wait the link is up until timeout (5s) */
+ timeout = 0;
+ while (timeout < 5000) {
+ msleep(20);
+ timeout += 20;
+
+ eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
+ if (val & PCI_EXP_LNKSTA_DLLLA)
+ break;
+ }
+
+ if (val & PCI_EXP_LNKSTA_DLLLA)
+ pr_debug(" Link up (%s)\n",
+ (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
+ else
+ pr_debug(" Link not ready (0x%04x)\n", val);
+}
+
+#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
+
+static void eeh_restore_bridge_bars(struct eeh_dev *edev,
+ struct device_node *dn)
+{
+ int i;
+
+ /*
+ * Device BARs: 0x10 - 0x18
+ * Bus numbers and windows: 0x18 - 0x30
+ */
+ for (i = 4; i < 13; i++)
+ eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ /* Rom: 0x38 */
+ eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
+
+ /* Cache line & Latency timer: 0xC 0xD */
+ eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+ eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+ /* Max latency, min grant, interrupt ping and line: 0x3C */
+ eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+
+ /* PCI Command: 0x4 */
+ eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
+
+ /* Check the PCIe link is ready */
+ eeh_bridge_check_link(edev, dn);
+}
+
+static void eeh_restore_device_bars(struct eeh_dev *edev,
+ struct device_node *dn)
+{
+ int i;
+ u32 cmd;
+
+ for (i = 4; i < 10; i++)
+ eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ /* 12 == Expansion ROM Address */
+ eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+
+ eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+ eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+
+ /* max latency, min grant, interrupt pin and line */
+ eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+
+ /*
+ * Restore PERR & SERR bits, some devices require it,
+ * don't touch the other command bits
+ */
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+ if (edev->config_space[1] & PCI_COMMAND_PARITY)
+ cmd |= PCI_COMMAND_PARITY;
+ else
+ cmd &= ~PCI_COMMAND_PARITY;
+ if (edev->config_space[1] & PCI_COMMAND_SERR)
+ cmd |= PCI_COMMAND_SERR;
+ else
+ cmd &= ~PCI_COMMAND_SERR;
+ eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+}
+
+/**
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
+ * @data: EEH device
+ * @flag: Unused
+ *
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static void *eeh_restore_one_device_bars(void *data, void *flag)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+
+ /* Do special restore for bridges */
+ if (edev->mode & EEH_DEV_BRIDGE)
+ eeh_restore_bridge_bars(edev, dn);
+ else
+ eeh_restore_device_bars(edev, dn);
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_restore_bars - Restore the PCI config space info
+ * @pe: EEH PE
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_pe_restore_bars(struct eeh_pe *pe)
+{
+ /*
+ * We needn't take the EEH lock since eeh_pe_dev_traverse()
+ * will take that.
+ */
+ eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
+}
+
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the PCI bus according to the given PE. Basically,
+ * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
+ * primary PCI bus will be retrieved. The parent bus will be
+ * returned for BUS PE. However, we don't have associated PCI
+ * bus for DEVICE PE.
+ */
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+ struct pci_bus *bus = NULL;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ if (pe->type & EEH_PE_PHB) {
+ bus = pe->phb->bus;
+ } else if (pe->type & EEH_PE_BUS ||
+ pe->type & EEH_PE_DEVICE) {
+ if (pe->bus) {
+ bus = pe->bus;
+ goto out;
+ }
+
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ bus = pdev->bus;
+ }
+
+out:
+ return bus;
+}
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
new file mode 100644
index 000000000000..5d753d4f2c75
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -0,0 +1,95 @@
+/*
+ * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
+ * Copyright IBM Corporation 2007
+ * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+
+/**
+ * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
+ * @_name: name of file in sysfs directory
+ * @_memb: name of member in struct pci_dn to access
+ * @_format: printf format for display
+ *
+ * All of the attributes look very similar, so just
+ * auto-gen a cut-n-paste routine to display them.
+ */
+#define EEH_SHOW_ATTR(_name,_memb,_format) \
+static ssize_t eeh_show_##_name(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \
+ \
+ if (!edev) \
+ return 0; \
+ \
+ return sprintf(buf, _format "\n", edev->_memb); \
+} \
+static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+
+EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
+EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
+EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
+
+void eeh_sysfs_add_device(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ int rc=0;
+
+ if (edev && (edev->mode & EEH_DEV_SYSFS))
+ return;
+
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+ if (rc)
+ printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+ else if (edev)
+ edev->mode |= EEH_DEV_SYSFS;
+}
+
+void eeh_sysfs_remove_device(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ /*
+ * The parent directory might have been removed. We needn't
+ * continue for that case.
+ */
+ if (!pdev->dev.kobj.sd) {
+ if (edev)
+ edev->mode &= ~EEH_DEV_SYSFS;
+ return;
+ }
+
+ device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
+ device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
+ device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+ if (edev)
+ edev->mode &= ~EEH_DEV_SYSFS;
+}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 246b11c4fe7e..c04cdf70d487 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -102,7 +102,8 @@ BEGIN_FW_FTR_SECTION
/* if from user, see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- ld r10,LPPACA_DTLIDX(r10) /* get log write index */
+ addi r10,r10,LPPACA_DTLIDX
+ LDX_BE r10,0,r10 /* get log write index */
cmpd cr1,r11,r10
beq+ cr1,33f
bl .accumulate_stolen_time
@@ -449,15 +450,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_FTR_SECTION
- /*
- * Back up the TAR across context switches. Note that the TAR is not
- * available for use in the kernel. (To provide this, the TAR should
- * be backed up/restored on exception entry/exit instead, and be in
- * pt_regs. FIXME, this should be in pt_regs anyway (for debug).)
- */
- mfspr r0,SPRN_TAR
- std r0,THREAD_TAR(r3)
-
/* Event based branch registers */
mfspr r0, SPRN_BESCR
std r0, THREAD_BESCR(r3)
@@ -465,20 +457,6 @@ BEGIN_FTR_SECTION
std r0, THREAD_EBBHR(r3)
mfspr r0, SPRN_EBBRR
std r0, THREAD_EBBRR(r3)
-
- /* PMU registers made user read/(write) by EBB */
- mfspr r0, SPRN_SIAR
- std r0, THREAD_SIAR(r3)
- mfspr r0, SPRN_SDAR
- std r0, THREAD_SDAR(r3)
- mfspr r0, SPRN_SIER
- std r0, THREAD_SIER(r3)
- mfspr r0, SPRN_MMCR0
- std r0, THREAD_MMCR0(r3)
- mfspr r0, SPRN_MMCR2
- std r0, THREAD_MMCR2(r3)
- mfspr r0, SPRN_MMCRA
- std r0, THREAD_MMCRA(r3)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
#endif
@@ -545,9 +523,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
*/
ld r9,PACA_SLBSHADOWPTR(r13)
li r12,0
- std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
- std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */
- std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */
+ std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+ li r12,SLBSHADOW_STACKVSID
+ STDX_BE r7,r12,r9 /* Save VSID */
+ li r12,SLBSHADOW_STACKESID
+ STDX_BE r0,r12,r9 /* Save ESID */
/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
* we have 1TB segments, the only CPUs known to have the errata
@@ -581,20 +561,6 @@ BEGIN_FTR_SECTION
ld r0, THREAD_EBBRR(r4)
mtspr SPRN_EBBRR, r0
- /* PMU registers made user read/(write) by EBB */
- ld r0, THREAD_SIAR(r4)
- mtspr SPRN_SIAR, r0
- ld r0, THREAD_SDAR(r4)
- mtspr SPRN_SDAR, r0
- ld r0, THREAD_SIER(r4)
- mtspr SPRN_SIER, r0
- ld r0, THREAD_MMCR0(r4)
- mtspr SPRN_MMCR0, r0
- ld r0, THREAD_MMCR2(r4)
- mtspr SPRN_MMCR2, r0
- ld r0, THREAD_MMCRA(r4)
- mtspr SPRN_MMCRA, r0
-
ld r0,THREAD_TAR(r4)
mtspr SPRN_TAR,r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
@@ -614,7 +580,13 @@ BEGIN_FTR_SECTION
cmpwi r6,0
bne 1f
ld r0,0(r7)
-1: cmpd r0,r25
+1:
+BEGIN_FTR_SECTION_NESTED(70)
+ mfspr r8, SPRN_FSCR
+ rldimi r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG)
+ mtspr SPRN_FSCR, r8
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
+ cmpd r0,r25
beq 2f
mtspr SPRN_DSCR,r0
2:
@@ -657,21 +629,43 @@ _GLOBAL(ret_from_except_lite)
CURRENT_THREAD_INFO(r9, r1)
ld r3,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3E
+ ld r10,PACACURRENT(r13)
+#endif /* CONFIG_PPC_BOOK3E */
ld r4,TI_FLAGS(r9)
andi. r3,r3,MSR_PR
beq resume_kernel
+#ifdef CONFIG_PPC_BOOK3E
+ lwz r3,(THREAD+THREAD_DBCR0)(r10)
+#endif /* CONFIG_PPC_BOOK3E */
/* Check current_thread_info()->flags */
andi. r0,r4,_TIF_USER_WORK_MASK
+#ifdef CONFIG_PPC_BOOK3E
+ bne 1f
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ andis. r0,r3,DBCR0_IDM@h
beq restore
-
- andi. r0,r4,_TIF_NEED_RESCHED
- beq 1f
+ mfmsr r0
+ rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
+ mtmsr r0
+ mtspr SPRN_DBCR0,r3
+ li r10, -1
+ mtspr SPRN_DBSR,r10
+ b restore
+#else
+ beq restore
+#endif
+1: andi. r0,r4,_TIF_NEED_RESCHED
+ beq 2f
bl .restore_interrupts
SCHEDULE_USER
b .ret_from_except_lite
-1: bl .save_nvgprs
+2: bl .save_nvgprs
bl .restore_interrupts
addi r3,r1,STACK_FRAME_OVERHEAD
bl .do_notify_resume
@@ -727,9 +721,9 @@ resume_kernel:
/*
* Here we are preempting the current task. We want to make
- * sure we are soft-disabled first
+ * sure we are soft-disabled first and reconcile irq state.
*/
- SOFT_DISABLE_INTS(r3,r4)
+ RECONCILE_IRQ_STATE(r3,r4)
1: bl .preempt_schedule_irq
/* Re-test flags and eventually loop */
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index d44a571e45a7..6300c13bbde4 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -30,22 +30,20 @@ extern u32 epapr_ev_idle_start[];
bool epapr_paravirt_enabled;
-static int __init epapr_paravirt_init(void)
+static int __init early_init_dt_scan_epapr(unsigned long node,
+ const char *uname,
+ int depth, void *data)
{
- struct device_node *hyper_node;
const u32 *insts;
- int len, i;
+ unsigned long len;
+ int i;
- hyper_node = of_find_node_by_path("/hypervisor");
- if (!hyper_node)
- return -ENODEV;
-
- insts = of_get_property(hyper_node, "hcall-instructions", &len);
+ insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
if (!insts)
- return -ENODEV;
+ return 0;
if (len % 4 || len > (4 * 4))
- return -ENODEV;
+ return -1;
for (i = 0; i < (len / 4); i++) {
patch_instruction(epapr_hypercall_start + i, insts[i]);
@@ -55,13 +53,19 @@ static int __init epapr_paravirt_init(void)
}
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
- if (of_get_property(hyper_node, "has-idle", NULL))
+ if (of_get_flat_dt_prop(node, "has-idle", NULL))
ppc_md.power_save = epapr_ev_idle;
#endif
epapr_paravirt_enabled = true;
+ return 1;
+}
+
+int __init epapr_paravirt_early_init(void)
+{
+ of_scan_flat_dt(early_init_dt_scan_epapr, NULL);
+
return 0;
}
-early_initcall(epapr_paravirt_init);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 645170a07ada..2d067049db27 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -198,9 +198,9 @@ exc_##n##_common: \
/* This second version is meant for exceptions that don't immediately
* hard-enable. We set a bit in paca->irq_happened to ensure that
* a subsequent call to arch_local_irq_restore() will properly
- * hard-enable and avoid the fast-path
+ * hard-enable and avoid the fast-path, and then reconcile irq state.
*/
-#define INTS_DISABLE SOFT_DISABLE_INTS(r3,r4)
+#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4)
/* This is called by exceptions that used INTS_KEEP (that did not touch
* irq indicators in the PACA). This will restore MSR:EE to it's previous
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e6eba1bf61ad..3a9ed6ac224b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -293,27 +293,31 @@ system_call_pSeries:
* out of line to handle them
*/
. = 0xe00
-hv_exception_trampoline:
+hv_data_storage_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b h_data_storage_hv
. = 0xe20
+hv_instr_storage_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b h_instr_storage_hv
. = 0xe40
+emulation_assist_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b emulation_assist_hv
. = 0xe60
+hv_exception_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b hmi_exception_hv
. = 0xe80
+hv_doorbell_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b h_doorbell_hv
@@ -323,28 +327,35 @@ hv_exception_trampoline:
* prolog code of the PerformanceMonitor one. A little
* trickery is thus necessary
*/
-performance_monitor_pSeries_1:
. = 0xf00
+performance_monitor_pseries_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b performance_monitor_pSeries
-altivec_unavailable_pSeries_1:
. = 0xf20
+altivec_unavailable_pseries_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b altivec_unavailable_pSeries
-vsx_unavailable_pSeries_1:
. = 0xf40
+vsx_unavailable_pseries_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b vsx_unavailable_pSeries
. = 0xf60
+facility_unavailable_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b facility_unavailable_pSeries
+
+ . = 0xf80
+hv_facility_unavailable_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
- b tm_unavailable_pSeries
+ b facility_unavailable_hv
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
@@ -360,11 +371,7 @@ denorm_exception_hv:
HMT_MEDIUM_PPR_DISCARD
mtspr SPRN_SPRG_HSCRATCH0,r13
EXCEPTION_PROLOG_0(PACA_EXGEN)
- std r11,PACA_EXGEN+EX_R11(r13)
- std r12,PACA_EXGEN+EX_R12(r13)
- mfspr r9,SPRN_SPRG_HSCRATCH0
- std r9,PACA_EXGEN+EX_R13(r13)
- mfcr r9
+ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
@@ -374,6 +381,7 @@ denorm_exception_hv:
bne+ denorm_assist
#endif
+ KVMTEST(0x1500)
EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500)
@@ -454,38 +462,14 @@ BEGIN_FTR_SECTION
xori r10,r10,(MSR_FE0|MSR_FE1)
mtmsrd r10
sync
- fmr 0,0
- fmr 1,1
- fmr 2,2
- fmr 3,3
- fmr 4,4
- fmr 5,5
- fmr 6,6
- fmr 7,7
- fmr 8,8
- fmr 9,9
- fmr 10,10
- fmr 11,11
- fmr 12,12
- fmr 13,13
- fmr 14,14
- fmr 15,15
- fmr 16,16
- fmr 17,17
- fmr 18,18
- fmr 19,19
- fmr 20,20
- fmr 21,21
- fmr 22,22
- fmr 23,23
- fmr 24,24
- fmr 25,25
- fmr 26,26
- fmr 27,27
- fmr 28,28
- fmr 29,29
- fmr 30,30
- fmr 31,31
+
+#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
+#define FMR4(n) FMR2(n) ; FMR2(n+2)
+#define FMR8(n) FMR4(n) ; FMR4(n+4)
+#define FMR16(n) FMR8(n) ; FMR8(n+8)
+#define FMR32(n) FMR16(n) ; FMR16(n+16)
+ FMR32(0)
+
FTR_SECTION_ELSE
/*
* To denormalise we need to move a copy of the register to itself.
@@ -495,43 +479,33 @@ FTR_SECTION_ELSE
oris r10,r10,MSR_VSX@h
mtmsrd r10
sync
- XVCPSGNDP(0,0,0)
- XVCPSGNDP(1,1,1)
- XVCPSGNDP(2,2,2)
- XVCPSGNDP(3,3,3)
- XVCPSGNDP(4,4,4)
- XVCPSGNDP(5,5,5)
- XVCPSGNDP(6,6,6)
- XVCPSGNDP(7,7,7)
- XVCPSGNDP(8,8,8)
- XVCPSGNDP(9,9,9)
- XVCPSGNDP(10,10,10)
- XVCPSGNDP(11,11,11)
- XVCPSGNDP(12,12,12)
- XVCPSGNDP(13,13,13)
- XVCPSGNDP(14,14,14)
- XVCPSGNDP(15,15,15)
- XVCPSGNDP(16,16,16)
- XVCPSGNDP(17,17,17)
- XVCPSGNDP(18,18,18)
- XVCPSGNDP(19,19,19)
- XVCPSGNDP(20,20,20)
- XVCPSGNDP(21,21,21)
- XVCPSGNDP(22,22,22)
- XVCPSGNDP(23,23,23)
- XVCPSGNDP(24,24,24)
- XVCPSGNDP(25,25,25)
- XVCPSGNDP(26,26,26)
- XVCPSGNDP(27,27,27)
- XVCPSGNDP(28,28,28)
- XVCPSGNDP(29,29,29)
- XVCPSGNDP(30,30,30)
- XVCPSGNDP(31,31,31)
+
+#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
+#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
+#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
+#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
+#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
+ XVCPSGNDP32(0)
+
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ b denorm_done
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER8 we need to do that for all 64 VSX registers
+ */
+ XVCPSGNDP32(32)
+denorm_done:
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_CFAR(r13)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
@@ -560,8 +534,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
- STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable)
+ STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
+ STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -721,7 +697,7 @@ machine_check_common:
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
- STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+ STD_EXCEPTION_COMMON(0xe40, emulation_assist, .emulation_assist_interrupt)
STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
#ifdef CONFIG_PPC_DOORBELL
STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, .doorbell_exception)
@@ -831,53 +807,55 @@ system_call_relon_pSeries:
STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
. = 0x4e00
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_data_storage_relon_hv
+ b . /* Can't happen, see v2.07 Book III-S section 6.5 */
. = 0x4e20
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_instr_storage_relon_hv
+ b . /* Can't happen, see v2.07 Book III-S section 6.5 */
. = 0x4e40
+emulation_assist_relon_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b emulation_assist_relon_hv
. = 0x4e60
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_relon_hv
+ b . /* Can't happen, see v2.07 Book III-S section 6.5 */
. = 0x4e80
+h_doorbell_relon_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b h_doorbell_relon_hv
-performance_monitor_relon_pSeries_1:
. = 0x4f00
+performance_monitor_relon_pseries_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b performance_monitor_relon_pSeries
-altivec_unavailable_relon_pSeries_1:
. = 0x4f20
+altivec_unavailable_relon_pseries_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b altivec_unavailable_relon_pSeries
-vsx_unavailable_relon_pSeries_1:
. = 0x4f40
+vsx_unavailable_relon_pseries_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
b vsx_unavailable_relon_pSeries
-tm_unavailable_relon_pSeries_1:
. = 0x4f60
+facility_unavailable_relon_trampoline:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXGEN)
- b tm_unavailable_relon_pSeries
+ b facility_unavailable_relon_pSeries
+
+ . = 0x4f80
+hv_facility_unavailable_relon_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b hv_facility_unavailable_relon_hv
STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
#ifdef CONFIG_PPC_DENORMALISATION
@@ -1203,36 +1181,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl .vsx_unavailable_exception
b .ret_from_except
- .align 7
- .globl tm_unavailable_common
-tm_unavailable_common:
- EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN)
- bl .save_nvgprs
- DISABLE_INTS
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .tm_unavailable_exception
- b .ret_from_except
+ STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception)
+ STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception)
.align 7
.globl __end_handlers
__end_handlers:
/* Equivalents to the above handlers for relocation-on interrupt vectors */
- STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00)
- STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20)
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40)
- STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60)
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80)
STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable)
+ STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
+ STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 8a9b6f59822d..67ee0d6c1070 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -822,14 +822,6 @@ finish_tlb_load:
rfi /* Should sync shadow TLBs */
b . /* prevent prefetch past rfi */
-/* extern void giveup_fpu(struct task_struct *prev)
- *
- * The PowerPC 4xx family of processors do not have an FPU, so this just
- * returns.
- */
-_ENTRY(giveup_fpu)
- blr
-
/* This is where the main kernel code starts.
*/
start_here:
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 97e2671cde7f..c334f53453f7 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -784,16 +784,6 @@ _GLOBAL(__fixup_440A_mcheck)
sync
blr
-/*
- * extern void giveup_fpu(struct task_struct *prev)
- *
- * The 44x core does not have an FPU.
- */
-#ifndef CONFIG_PPC_FPU
-_GLOBAL(giveup_fpu)
- blr
-#endif
-
_GLOBAL(set_context)
#ifdef CONFIG_BDI_SWITCH
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index b61363d557b5..3d11d8038dee 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -703,6 +703,7 @@ _GLOBAL(relative_toc)
mtlr r0
blr
+.balign 8
p_toc: .llong __toc_start + 0x8000 - 0b
/*
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index b2a5860accfb..1b92a97b1b04 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -691,10 +691,6 @@ modified_instr:
b 151b
#endif
- .globl giveup_fpu
-giveup_fpu:
- blr
-
/*
* This is where the main kernel code starts.
*/
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index d10a7cacccd2..289afaffbbb5 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -948,16 +948,6 @@ _GLOBAL(giveup_spe)
#endif /* CONFIG_SPE */
/*
- * extern void giveup_fpu(struct task_struct *prev)
- *
- * Not all FSL Book-E cores have an FPU
- */
-#ifndef CONFIG_PPC_FPU
-_GLOBAL(giveup_fpu)
- blr
-#endif
-
-/*
* extern void abort(void)
*
* At present, this routine just applies a system reset.
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index a949bdfc9623..f0b47d1a6b0e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -176,7 +176,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
if ((bp->attr.bp_addr >> 10) !=
- ((bp->attr.bp_addr + bp->attr.bp_len) >> 10))
+ ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
return -EINVAL;
}
if (info->len >
@@ -250,6 +250,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
* we still need to single-step the instruction, but we don't
* generate an event.
*/
+ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
if (!((bp->attr.bp_addr <= dar) &&
(dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 8220baa46faf..16a7c2326d48 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -205,7 +205,7 @@ static int ibmebus_create_devices(const struct of_device_id *matches)
return ret;
}
-int ibmebus_register_driver(struct of_platform_driver *drv)
+int ibmebus_register_driver(struct platform_driver *drv)
{
/* If the driver uses devices that ibmebus doesn't know, add them */
ibmebus_create_devices(drv->driver.of_match_table);
@@ -215,7 +215,7 @@ int ibmebus_register_driver(struct of_platform_driver *drv)
}
EXPORT_SYMBOL(ibmebus_register_driver);
-void ibmebus_unregister_driver(struct of_platform_driver *drv)
+void ibmebus_unregister_driver(struct platform_driver *drv)
{
driver_unregister(&drv->driver);
}
@@ -338,11 +338,10 @@ static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
static int ibmebus_bus_device_probe(struct device *dev)
{
int error = -ENODEV;
- struct of_platform_driver *drv;
+ struct platform_driver *drv;
struct platform_device *of_dev;
- const struct of_device_id *match;
- drv = to_of_platform_driver(dev->driver);
+ drv = to_platform_driver(dev->driver);
of_dev = to_platform_device(dev);
if (!drv->probe)
@@ -350,9 +349,8 @@ static int ibmebus_bus_device_probe(struct device *dev)
of_dev_get(of_dev);
- match = of_match_device(drv->driver.of_match_table, dev);
- if (match)
- error = drv->probe(of_dev, match);
+ if (of_driver_match_device(dev, dev->driver))
+ error = drv->probe(of_dev);
if (error)
of_dev_put(of_dev);
@@ -362,7 +360,7 @@ static int ibmebus_bus_device_probe(struct device *dev)
static int ibmebus_bus_device_remove(struct device *dev)
{
struct platform_device *of_dev = to_platform_device(dev);
- struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
if (dev->driver && drv->remove)
drv->remove(of_dev);
@@ -372,7 +370,7 @@ static int ibmebus_bus_device_remove(struct device *dev)
static void ibmebus_bus_device_shutdown(struct device *dev)
{
struct platform_device *of_dev = to_platform_device(dev);
- struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
if (dev->driver && drv->shutdown)
drv->shutdown(of_dev);
@@ -419,7 +417,7 @@ struct device_attribute ibmebus_bus_device_attrs[] = {
static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
{
struct platform_device *of_dev = to_platform_device(dev);
- struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
int ret = 0;
if (dev->driver && drv->suspend)
@@ -430,7 +428,7 @@ static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
static int ibmebus_bus_legacy_resume(struct device *dev)
{
struct platform_device *of_dev = to_platform_device(dev);
- struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
int ret = 0;
if (dev->driver && drv->resume)
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 939ea7ef0dc8..d7216c9abda1 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -85,7 +85,7 @@ int powersave_nap;
/*
* Register the sysctl to set/clear powersave_nap.
*/
-static ctl_table powersave_nap_ctl_table[]={
+static struct ctl_table powersave_nap_ctl_table[] = {
{
.procname = "powersave-nap",
.data = &powersave_nap,
@@ -95,7 +95,7 @@ static ctl_table powersave_nap_ctl_table[]={
},
{}
};
-static ctl_table powersave_nap_sysctl_root[] = {
+static struct ctl_table powersave_nap_sysctl_root[] = {
{
.procname = "kernel",
.mode = 0555,
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 50e90b7e7139..24b968f8e4d8 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -53,8 +53,10 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
return NULL;
}
+#ifdef CONFIG_PPC_INDIRECT_MMIO
struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
{
+ unsigned hugepage_shift;
struct iowa_bus *bus;
int token;
@@ -70,11 +72,17 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
- ptep = find_linux_pte(init_mm.pgd, vaddr);
+ ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
+ &hugepage_shift);
if (ptep == NULL)
paddr = 0;
- else
+ else {
+ /*
+ * we don't have hugepages backing iomem
+ */
+ WARN_ON(hugepage_shift);
paddr = pte_pfn(*ptep) << PAGE_SHIFT;
+ }
bus = iowa_pci_find(vaddr, paddr);
if (bus == NULL)
@@ -83,13 +91,25 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
return bus;
}
+#else /* CONFIG_PPC_INDIRECT_MMIO */
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+ return NULL;
+}
+#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+#ifdef CONFIG_PPC_INDIRECT_PIO
struct iowa_bus *iowa_pio_find_bus(unsigned long port)
{
unsigned long vaddr = (unsigned long)pci_io_base + port;
return iowa_pci_find(vaddr, 0);
}
-
+#else
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+ return NULL;
+}
+#endif
#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
static ret iowa_##name at \
@@ -130,6 +150,7 @@ static const struct ppc_pci_io iowa_pci_io = {
};
+#ifdef CONFIG_PPC_INDIRECT_MMIO
static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
unsigned long flags, void *caller)
{
@@ -144,6 +165,9 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
+#else /* CONFIG_PPC_INDIRECT_MMIO */
+#define iowa_ioremap NULL
+#endif /* !CONFIG_PPC_INDIRECT_MMIO */
/* Enable IO workaround */
static void io_workaround_init(void)
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index 886381f32c3d..2a2b4aeab80f 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -25,6 +25,9 @@
#include <asm/firmware.h>
#include <asm/bug.h>
+/* See definition in io.h */
+bool isa_io_special;
+
void _insb(const volatile u8 __iomem *port, void *buf, long count)
{
u8 *tbuf = buf;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index c0d0dbddfba1..572bb5b95f35 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -36,6 +36,8 @@
#include <linux/hash.h>
#include <linux/fault-inject.h>
#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/sched.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
@@ -44,6 +46,7 @@
#include <asm/kdump.h>
#include <asm/fadump.h>
#include <asm/vio.h>
+#include <asm/tce.h>
#define DBG(...)
@@ -102,7 +105,7 @@ static int __init fail_iommu_debugfs(void)
struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
NULL, &fail_iommu);
- return PTR_RET(dir);
+ return PTR_ERR_OR_ZERO(dir);
}
late_initcall(fail_iommu_debugfs);
@@ -658,7 +661,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
/* number of bytes needed for the bitmap */
sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
- page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
+ page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
if (!page)
panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
tbl->it_map = page_address(page);
@@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
+#ifdef CONFIG_IOMMU_API
+ if (tbl->it_group) {
+ iommu_group_put(tbl->it_group);
+ BUG_ON(tbl->it_group);
+ }
+#endif
+
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
@@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
free_pages((unsigned long)vaddr, get_order(size));
}
}
+
+#ifdef CONFIG_IOMMU_API
+/*
+ * SPAPR TCE API
+ */
+static void group_release(void *iommu_data)
+{
+ struct iommu_table *tbl = iommu_data;
+ tbl->it_group = NULL;
+}
+
+void iommu_register_group(struct iommu_table *tbl,
+ int pci_domain_number, unsigned long pe_num)
+{
+ struct iommu_group *grp;
+ char *name;
+
+ grp = iommu_group_alloc();
+ if (IS_ERR(grp)) {
+ pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
+ PTR_ERR(grp));
+ return;
+ }
+ tbl->it_group = grp;
+ iommu_group_set_iommudata(grp, tbl, group_release);
+ name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
+ pci_domain_number, pe_num);
+ if (!name)
+ return;
+ iommu_group_set_name(grp, name);
+ kfree(name);
+}
+
+enum dma_data_direction iommu_tce_direction(unsigned long tce)
+{
+ if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
+ return DMA_BIDIRECTIONAL;
+ else if (tce & TCE_PCI_READ)
+ return DMA_TO_DEVICE;
+ else if (tce & TCE_PCI_WRITE)
+ return DMA_FROM_DEVICE;
+ else
+ return DMA_NONE;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_direction);
+
+void iommu_flush_tce(struct iommu_table *tbl)
+{
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+}
+EXPORT_SYMBOL_GPL(iommu_flush_tce);
+
+int iommu_tce_clear_param_check(struct iommu_table *tbl,
+ unsigned long ioba, unsigned long tce_value,
+ unsigned long npages)
+{
+ /* ppc_md.tce_free() does not support any value but 0 */
+ if (tce_value)
+ return -EINVAL;
+
+ if (ioba & ~IOMMU_PAGE_MASK)
+ return -EINVAL;
+
+ ioba >>= IOMMU_PAGE_SHIFT;
+ if (ioba < tbl->it_offset)
+ return -EINVAL;
+
+ if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
+
+int iommu_tce_put_param_check(struct iommu_table *tbl,
+ unsigned long ioba, unsigned long tce)
+{
+ if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
+ return -EINVAL;
+
+ if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ))
+ return -EINVAL;
+
+ if (ioba & ~IOMMU_PAGE_MASK)
+ return -EINVAL;
+
+ ioba >>= IOMMU_PAGE_SHIFT;
+ if (ioba < tbl->it_offset)
+ return -EINVAL;
+
+ if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
+
+unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long oldtce;
+ struct iommu_pool *pool = get_pool(tbl, entry);
+
+ spin_lock(&(pool->lock));
+
+ oldtce = ppc_md.tce_get(tbl, entry);
+ if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
+ ppc_md.tce_free(tbl, entry, 1);
+ else
+ oldtce = 0;
+
+ spin_unlock(&(pool->lock));
+
+ return oldtce;
+}
+EXPORT_SYMBOL_GPL(iommu_clear_tce);
+
+int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ unsigned long oldtce;
+ struct page *page;
+
+ for ( ; pages; --pages, ++entry) {
+ oldtce = iommu_clear_tce(tbl, entry);
+ if (!oldtce)
+ continue;
+
+ page = pfn_to_page(oldtce >> PAGE_SHIFT);
+ WARN_ON(!page);
+ if (page) {
+ if (oldtce & TCE_PCI_WRITE)
+ SetPageDirty(page);
+ put_page(page);
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
+
+/*
+ * hwaddr is a kernel virtual address here (0xc... bazillion),
+ * tce_build converts it to a physical address.
+ */
+int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
+ unsigned long hwaddr, enum dma_data_direction direction)
+{
+ int ret = -EBUSY;
+ unsigned long oldtce;
+ struct iommu_pool *pool = get_pool(tbl, entry);
+
+ spin_lock(&(pool->lock));
+
+ oldtce = ppc_md.tce_get(tbl, entry);
+ /* Add new entry if it is not busy */
+ if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
+ ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
+
+ spin_unlock(&(pool->lock));
+
+ /* if (unlikely(ret))
+ pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
+ __func__, hwaddr, entry << IOMMU_PAGE_SHIFT,
+ hwaddr, ret); */
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_build);
+
+int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
+ unsigned long tce)
+{
+ int ret;
+ struct page *page = NULL;
+ unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK;
+ enum dma_data_direction direction = iommu_tce_direction(tce);
+
+ ret = get_user_pages_fast(tce & PAGE_MASK, 1,
+ direction != DMA_TO_DEVICE, &page);
+ if (unlikely(ret != 1)) {
+ /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
+ tce, entry << IOMMU_PAGE_SHIFT, ret); */
+ return -EFAULT;
+ }
+ hwaddr = (unsigned long) page_address(page) + offset;
+
+ ret = iommu_tce_build(tbl, entry, hwaddr, direction);
+ if (ret)
+ put_page(page);
+
+ if (ret < 0)
+ pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
+ __func__, entry << IOMMU_PAGE_SHIFT, tce, ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
+
+int iommu_take_ownership(struct iommu_table *tbl)
+{
+ unsigned long sz = (tbl->it_size + 7) >> 3;
+
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+ pr_err("iommu_tce: it_map is not empty");
+ return -EBUSY;
+ }
+
+ memset(tbl->it_map, 0xff, sz);
+ iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_take_ownership);
+
+void iommu_release_ownership(struct iommu_table *tbl)
+{
+ unsigned long sz = (tbl->it_size + 7) >> 3;
+
+ iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+ memset(tbl->it_map, 0, sz);
+
+ /* Restore bit#0 set by iommu_init_table() */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+}
+EXPORT_SYMBOL_GPL(iommu_release_ownership);
+
+static int iommu_add_device(struct device *dev)
+{
+ struct iommu_table *tbl;
+ int ret = 0;
+
+ if (WARN_ON(dev->iommu_group)) {
+ pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
+ dev_name(dev),
+ iommu_group_id(dev->iommu_group));
+ return -EBUSY;
+ }
+
+ tbl = get_iommu_table_base(dev);
+ if (!tbl || !tbl->it_group) {
+ pr_debug("iommu_tce: skipping device %s with no tbl\n",
+ dev_name(dev));
+ return 0;
+ }
+
+ pr_debug("iommu_tce: adding %s to iommu group %d\n",
+ dev_name(dev), iommu_group_id(tbl->it_group));
+
+ ret = iommu_group_add_device(tbl->it_group, dev);
+ if (ret < 0)
+ pr_err("iommu_tce: %s has not been added, ret=%d\n",
+ dev_name(dev), ret);
+
+ return ret;
+}
+
+static void iommu_del_device(struct device *dev)
+{
+ iommu_group_remove_device(dev);
+}
+
+static int iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ return iommu_add_device(dev);
+ case BUS_NOTIFY_DEL_DEVICE:
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+ .notifier_call = iommu_bus_notifier,
+};
+
+static int __init tce_iommu_init(void)
+{
+ struct pci_dev *pdev = NULL;
+
+ BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
+
+ for_each_pci_dev(pdev)
+ iommu_add_device(&pdev->dev);
+
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+ return 0;
+}
+
+subsys_initcall_sync(tce_iommu_init);
+
+#else
+
+void iommu_register_group(struct iommu_table *tbl,
+ int pci_domain_number, unsigned long pe_num)
+{
+}
+
+#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5cbcf4d5a808..57d286a78f86 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void)
u64 now = get_tb_or_rtc();
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
- if (now >= *next_tb)
- set_dec(1);
return now >= *next_tb;
}
@@ -162,7 +160,7 @@ notrace unsigned int __check_irq_replay(void)
* in case we also had a rollover while hard disabled
*/
local_paca->irq_happened &= ~PACA_IRQ_DEC;
- if (decrementer_check_overflow())
+ if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
return 0x900;
/* Finally check if an external interrupt happened */
@@ -364,7 +362,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
seq_printf(p, " Spurious interrupts\n");
- seq_printf(p, "%*s: ", prec, "CNT");
+ seq_printf(p, "%*s: ", prec, "PMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
@@ -443,50 +441,6 @@ void migrate_irqs(void)
}
#endif
-static inline void handle_one_irq(unsigned int irq)
-{
- struct thread_info *curtp, *irqtp;
- unsigned long saved_sp_limit;
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- if (!desc)
- return;
-
- /* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[smp_processor_id()];
-
- if (curtp == irqtp) {
- /* We're already on the irq stack, just handle it */
- desc->handle_irq(irq, desc);
- return;
- }
-
- saved_sp_limit = current->thread.ksp_limit;
-
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the softirq bits in preempt_count so that the
- * softirq checks work in the hardirq context. */
- irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
- (curtp->preempt_count & SOFTIRQ_MASK);
-
- current->thread.ksp_limit = (unsigned long)irqtp +
- _ALIGN_UP(sizeof(struct thread_info), 16);
-
- call_handle_irq(irq, desc, irqtp, desc->handle_irq);
- current->thread.ksp_limit = saved_sp_limit;
- irqtp->task = NULL;
-
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
-}
-
static inline void check_stack_overflow(void)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -503,9 +457,9 @@ static inline void check_stack_overflow(void)
#endif
}
-void do_IRQ(struct pt_regs *regs)
+void __do_irq(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
+ struct irq_desc *desc;
unsigned int irq;
irq_enter();
@@ -521,18 +475,56 @@ void do_IRQ(struct pt_regs *regs)
*/
irq = ppc_md.get_irq();
- /* We can hard enable interrupts now */
+ /* We can hard enable interrupts now to allow perf interrupts */
may_hard_irq_enable();
/* And finally process it */
- if (irq != NO_IRQ)
- handle_one_irq(irq);
- else
+ if (unlikely(irq == NO_IRQ))
__get_cpu_var(irq_stat).spurious_irqs++;
+ else {
+ desc = irq_to_desc(irq);
+ if (likely(desc))
+ desc->handle_irq(irq, desc);
+ }
trace_irq_exit(regs);
irq_exit();
+}
+
+void do_IRQ(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct thread_info *curtp, *irqtp;
+
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[raw_smp_processor_id()];
+
+ /* Already there ? */
+ if (unlikely(curtp == irqtp)) {
+ __do_irq(regs);
+ set_irq_regs(old_regs);
+ return;
+ }
+
+ /* Prepare the thread_info in the irq stack */
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqtp->preempt_count = curtp->preempt_count;
+
+ /* Switch stack and call */
+ call_do_irq(regs, irqtp);
+
+ /* Restore stack limit */
+ irqtp->task = NULL;
+
+ /* Copy back updates to the thread_info */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+
set_irq_regs(old_regs);
}
@@ -594,28 +586,22 @@ void irq_ctx_init(void)
memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
tp = softirq_ctx[i];
tp->cpu = i;
- tp->preempt_count = 0;
memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
tp = hardirq_ctx[i];
tp->cpu = i;
- tp->preempt_count = HARDIRQ_OFFSET;
}
}
static inline void do_softirq_onstack(void)
{
struct thread_info *curtp, *irqtp;
- unsigned long saved_sp_limit = current->thread.ksp_limit;
curtp = current_thread_info();
irqtp = softirq_ctx[smp_processor_id()];
irqtp->task = curtp->task;
irqtp->flags = 0;
- current->thread.ksp_limit = (unsigned long)irqtp +
- _ALIGN_UP(sizeof(struct thread_info), 16);
call_do_softirq(irqtp);
- current->thread.ksp_limit = saved_sp_limit;
irqtp->task = NULL;
/* Set any flag that may have been set on the
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 11f5b03a0b06..2156ea90eb54 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -36,12 +36,6 @@
#include <asm/sstep.h>
#include <asm/uaccess.h>
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-#define MSR_SINGLESTEP (MSR_DE)
-#else
-#define MSR_SINGLESTEP (MSR_SE)
-#endif
-
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
- /* We turn off async exceptions to ensure that the single step will
- * be for the instruction we have the kprobe on, if we dont its
- * possible we'd get the single step reported for an exception handler
- * like Decrementer or External Interrupt */
- regs->msr &= ~MSR_EE;
- regs->msr |= MSR_SINGLESTEP;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- regs->msr &= ~MSR_CE;
- mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
-#ifdef CONFIG_PPC_47x
- isync();
-#endif
-#endif
+ enable_single_step(regs);
/*
* On powerpc we should single step on the original
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 6782221d49bd..db28032e320e 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall);
static __init void kvm_free_tmp(void)
{
- unsigned long start, end;
-
- start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
- end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
-
- /* Free the tmp space we don't need */
- free_reserved_area(start, end, 0, NULL);
+ free_reserved_area(&kvm_tmp[kvm_tmp_index],
+ &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
static int __init kvm_guest_init(void)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 0733b05eb856..22e88dd2f34a 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -99,7 +99,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_count = index + 1;
/* Check if there is a port who already claimed our slot */
- if (legacy_serial_infos[index].np != 0) {
+ if (legacy_serial_infos[index].np != NULL) {
/* if we still have some room, move it, else override */
if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
@@ -152,7 +152,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
struct device_node *soc_dev)
{
u64 addr;
- const u32 *addrp;
+ const __be32 *addrp;
upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ
| UPF_FIXED_PORT;
struct device_node *tsi = of_get_parent(np);
@@ -221,14 +221,19 @@ static int __init add_legacy_isa_port(struct device_node *np,
/* Translate ISA address. If it fails, we still register the port
* with no translated address so that it can be picked up as an IO
* port later by the serial driver
+ *
+ * Note: Don't even try on P8 lpc, we know it's not directly mapped
*/
- taddr = of_translate_address(np, reg);
- if (taddr == OF_BAD_ADDR)
+ if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) {
+ taddr = of_translate_address(np, reg);
+ if (taddr == OF_BAD_ADDR)
+ taddr = 0;
+ } else
taddr = 0;
/* Add port, irq will be dealt with later */
- return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr,
- NO_IRQ, UPF_BOOT_AUTOCONF, 0);
+ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
+ taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0);
}
@@ -237,7 +242,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
struct device_node *pci_dev)
{
u64 addr, base;
- const u32 *addrp;
+ const __be32 *addrp;
unsigned int flags;
int iotype, index = -1, lindex = 0;
@@ -270,7 +275,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
if (iotype == UPIO_MEM)
base = addr;
else
- base = addrp[2];
+ base = of_read_number(&addrp[2], 1);
/* Try to guess an index... If we have subdevices of the pci dev,
* we get to their "reg" property
@@ -307,19 +312,31 @@ static int __init add_legacy_pci_port(struct device_node *np,
static void __init setup_legacy_serial_console(int console)
{
- struct legacy_serial_info *info =
- &legacy_serial_infos[console];
+ struct legacy_serial_info *info = &legacy_serial_infos[console];
+ struct plat_serial8250_port *port = &legacy_serial_ports[console];
void __iomem *addr;
- if (info->taddr == 0)
- return;
- addr = ioremap(info->taddr, 0x1000);
- if (addr == NULL)
- return;
+ /* Check if a translated MMIO address has been found */
+ if (info->taddr) {
+ addr = ioremap(info->taddr, 0x1000);
+ if (addr == NULL)
+ return;
+ udbg_uart_init_mmio(addr, 1);
+ } else {
+ /* Check if it's PIO and we support untranslated PIO */
+ if (port->iotype == UPIO_PORT && isa_io_special)
+ udbg_uart_init_pio(port->iobase, 1);
+ else
+ return;
+ }
+
+ /* Try to query the current speed */
if (info->speed == 0)
- info->speed = udbg_probe_uart_speed(addr, info->clock);
+ info->speed = udbg_probe_uart_speed(info->clock);
+
+ /* Set it up */
DBG("default console speed = %d\n", info->speed);
- udbg_init_uart(addr, info->speed, info->clock);
+ udbg_uart_setup(info->speed, info->clock);
}
/*
@@ -367,10 +384,13 @@ void __init find_legacy_serial_ports(void)
/* Next, fill our array with ISA ports */
for_each_node_by_type(np, "serial") {
struct device_node *isa = of_get_parent(np);
- if (isa && !strcmp(isa->name, "isa")) {
- index = add_legacy_isa_port(np, isa);
- if (index >= 0 && np == stdout)
- legacy_serial_console = index;
+ if (isa && (!strcmp(isa->name, "isa") ||
+ !strcmp(isa->name, "lpc"))) {
+ if (of_device_is_available(np)) {
+ index = add_legacy_isa_port(np, isa);
+ if (index >= 0 && np == stdout)
+ legacy_serial_console = index;
+ }
}
of_node_put(isa);
}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
deleted file mode 100644
index d92f3871e9cf..000000000000
--- a/arch/powerpc/kernel/lparcfg.c
+++ /dev/null
@@ -1,712 +0,0 @@
-/*
- * PowerPC64 LPAR Configuration Information Driver
- *
- * Dave Engebretsen engebret@us.ibm.com
- * Copyright (c) 2003 Dave Engebretsen
- * Will Schmidt willschm@us.ibm.com
- * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
- * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
- * Nathan Lynch nathanl@austin.ibm.com
- * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This driver creates a proc file at /proc/ppc64/lparcfg which contains
- * keyword - value pairs that specify the configuration of the partition.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include <asm/lppaca.h>
-#include <asm/hvcall.h>
-#include <asm/firmware.h>
-#include <asm/rtas.h>
-#include <asm/time.h>
-#include <asm/prom.h>
-#include <asm/vdso_datapage.h>
-#include <asm/vio.h>
-#include <asm/mmu.h>
-
-#define MODULE_VERS "1.9"
-#define MODULE_NAME "lparcfg"
-
-/* #define LPARCFG_DEBUG */
-
-/*
- * Track sum of all purrs across all processors. This is used to further
- * calculate usage values by different applications
- */
-static unsigned long get_purr(void)
-{
- unsigned long sum_purr = 0;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct cpu_usage *cu;
-
- cu = &per_cpu(cpu_usage_array, cpu);
- sum_purr += cu->current_tb;
- }
- return sum_purr;
-}
-
-/*
- * Methods used to fetch LPAR data when running on a pSeries platform.
- */
-
-struct hvcall_ppp_data {
- u64 entitlement;
- u64 unallocated_entitlement;
- u16 group_num;
- u16 pool_num;
- u8 capped;
- u8 weight;
- u8 unallocated_weight;
- u16 active_procs_in_pool;
- u16 active_system_procs;
- u16 phys_platform_procs;
- u32 max_proc_cap_avail;
- u32 entitled_proc_cap_avail;
-};
-
-/*
- * H_GET_PPP hcall returns info in 4 parms.
- * entitled_capacity,unallocated_capacity,
- * aggregation, resource_capability).
- *
- * R4 = Entitled Processor Capacity Percentage.
- * R5 = Unallocated Processor Capacity Percentage.
- * R6 (AABBCCDDEEFFGGHH).
- * XXXX - reserved (0)
- * XXXX - reserved (0)
- * XXXX - Group Number
- * XXXX - Pool Number.
- * R7 (IIJJKKLLMMNNOOPP).
- * XX - reserved. (0)
- * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
- * XX - variable processor Capacity Weight
- * XX - Unallocated Variable Processor Capacity Weight.
- * XXXX - Active processors in Physical Processor Pool.
- * XXXX - Processors active on platform.
- * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
- * XXXX - Physical platform procs allocated to virtualization.
- * XXXXXX - Max procs capacity % available to the partitions pool.
- * XXXXXX - Entitled procs capacity % available to the
- * partitions pool.
- */
-static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
-{
- unsigned long rc;
- unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
-
- rc = plpar_hcall9(H_GET_PPP, retbuf);
-
- ppp_data->entitlement = retbuf[0];
- ppp_data->unallocated_entitlement = retbuf[1];
-
- ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
- ppp_data->pool_num = retbuf[2] & 0xffff;
-
- ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
- ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
- ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
- ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
- ppp_data->active_system_procs = retbuf[3] & 0xffff;
-
- ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
- ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
- ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
-
- return rc;
-}
-
-static unsigned h_pic(unsigned long *pool_idle_time,
- unsigned long *num_procs)
-{
- unsigned long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall(H_PIC, retbuf);
-
- *pool_idle_time = retbuf[0];
- *num_procs = retbuf[1];
-
- return rc;
-}
-
-/*
- * parse_ppp_data
- * Parse out the data returned from h_get_ppp and h_pic
- */
-static void parse_ppp_data(struct seq_file *m)
-{
- struct hvcall_ppp_data ppp_data;
- struct device_node *root;
- const int *perf_level;
- int rc;
-
- rc = h_get_ppp(&ppp_data);
- if (rc)
- return;
-
- seq_printf(m, "partition_entitled_capacity=%lld\n",
- ppp_data.entitlement);
- seq_printf(m, "group=%d\n", ppp_data.group_num);
- seq_printf(m, "system_active_processors=%d\n",
- ppp_data.active_system_procs);
-
- /* pool related entries are appropriate for shared configs */
- if (lppaca_of(0).shared_proc) {
- unsigned long pool_idle_time, pool_procs;
-
- seq_printf(m, "pool=%d\n", ppp_data.pool_num);
-
- /* report pool_capacity in percentage */
- seq_printf(m, "pool_capacity=%d\n",
- ppp_data.active_procs_in_pool * 100);
-
- h_pic(&pool_idle_time, &pool_procs);
- seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
- seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
- }
-
- seq_printf(m, "unallocated_capacity_weight=%d\n",
- ppp_data.unallocated_weight);
- seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
- seq_printf(m, "capped=%d\n", ppp_data.capped);
- seq_printf(m, "unallocated_capacity=%lld\n",
- ppp_data.unallocated_entitlement);
-
- /* The last bits of information returned from h_get_ppp are only
- * valid if the ibm,partition-performance-parameters-level
- * property is >= 1.
- */
- root = of_find_node_by_path("/");
- if (root) {
- perf_level = of_get_property(root,
- "ibm,partition-performance-parameters-level",
- NULL);
- if (perf_level && (*perf_level >= 1)) {
- seq_printf(m,
- "physical_procs_allocated_to_virtualization=%d\n",
- ppp_data.phys_platform_procs);
- seq_printf(m, "max_proc_capacity_available=%d\n",
- ppp_data.max_proc_cap_avail);
- seq_printf(m, "entitled_proc_capacity_available=%d\n",
- ppp_data.entitled_proc_cap_avail);
- }
-
- of_node_put(root);
- }
-}
-
-/**
- * parse_mpp_data
- * Parse out data returned from h_get_mpp
- */
-static void parse_mpp_data(struct seq_file *m)
-{
- struct hvcall_mpp_data mpp_data;
- int rc;
-
- rc = h_get_mpp(&mpp_data);
- if (rc)
- return;
-
- seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
-
- if (mpp_data.mapped_mem != -1)
- seq_printf(m, "mapped_entitled_memory=%ld\n",
- mpp_data.mapped_mem);
-
- seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
- seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
-
- seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
- seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
- mpp_data.unallocated_mem_weight);
- seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
- mpp_data.unallocated_entitlement);
-
- if (mpp_data.pool_size != -1)
- seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
- mpp_data.pool_size);
-
- seq_printf(m, "entitled_memory_loan_request=%ld\n",
- mpp_data.loan_request);
-
- seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
-}
-
-/**
- * parse_mpp_x_data
- * Parse out data returned from h_get_mpp_x
- */
-static void parse_mpp_x_data(struct seq_file *m)
-{
- struct hvcall_mpp_x_data mpp_x_data;
-
- if (!firmware_has_feature(FW_FEATURE_XCMO))
- return;
- if (h_get_mpp_x(&mpp_x_data))
- return;
-
- seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
-
- if (mpp_x_data.pool_coalesced_bytes)
- seq_printf(m, "pool_coalesced_bytes=%ld\n",
- mpp_x_data.pool_coalesced_bytes);
- if (mpp_x_data.pool_purr_cycles)
- seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
- if (mpp_x_data.pool_spurr_cycles)
- seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
-}
-
-#define SPLPAR_CHARACTERISTICS_TOKEN 20
-#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
-
-/*
- * parse_system_parameter_string()
- * Retrieve the potential_processors, max_entitled_capacity and friends
- * through the get-system-parameter rtas call. Replace keyword strings as
- * necessary.
- */
-static void parse_system_parameter_string(struct seq_file *m)
-{
- int call_status;
-
- unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
- if (!local_buffer) {
- printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
- __FILE__, __func__, __LINE__);
- return;
- }
-
- spin_lock(&rtas_data_buf_lock);
- memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
- call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
- NULL,
- SPLPAR_CHARACTERISTICS_TOKEN,
- __pa(rtas_data_buf),
- RTAS_DATA_BUF_SIZE);
- memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
- local_buffer[SPLPAR_MAXLENGTH - 1] = '\0';
- spin_unlock(&rtas_data_buf_lock);
-
- if (call_status != 0) {
- printk(KERN_INFO
- "%s %s Error calling get-system-parameter (0x%x)\n",
- __FILE__, __func__, call_status);
- } else {
- int splpar_strlen;
- int idx, w_idx;
- char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
- if (!workbuffer) {
- printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
- __FILE__, __func__, __LINE__);
- kfree(local_buffer);
- return;
- }
-#ifdef LPARCFG_DEBUG
- printk(KERN_INFO "success calling get-system-parameter\n");
-#endif
- splpar_strlen = local_buffer[0] * 256 + local_buffer[1];
- local_buffer += 2; /* step over strlen value */
-
- w_idx = 0;
- idx = 0;
- while ((*local_buffer) && (idx < splpar_strlen)) {
- workbuffer[w_idx++] = local_buffer[idx++];
- if ((local_buffer[idx] == ',')
- || (local_buffer[idx] == '\0')) {
- workbuffer[w_idx] = '\0';
- if (w_idx) {
- /* avoid the empty string */
- seq_printf(m, "%s\n", workbuffer);
- }
- memset(workbuffer, 0, SPLPAR_MAXLENGTH);
- idx++; /* skip the comma */
- w_idx = 0;
- } else if (local_buffer[idx] == '=') {
- /* code here to replace workbuffer contents
- with different keyword strings */
- if (0 == strcmp(workbuffer, "MaxEntCap")) {
- strcpy(workbuffer,
- "partition_max_entitled_capacity");
- w_idx = strlen(workbuffer);
- }
- if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
- strcpy(workbuffer,
- "system_potential_processors");
- w_idx = strlen(workbuffer);
- }
- }
- }
- kfree(workbuffer);
- local_buffer -= 2; /* back up over strlen value */
- }
- kfree(local_buffer);
-}
-
-/* Return the number of processors in the system.
- * This function reads through the device tree and counts
- * the virtual processors, this does not include threads.
- */
-static int lparcfg_count_active_processors(void)
-{
- struct device_node *cpus_dn = NULL;
- int count = 0;
-
- while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
-#ifdef LPARCFG_DEBUG
- printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
-#endif
- count++;
- }
- return count;
-}
-
-static void pseries_cmo_data(struct seq_file *m)
-{
- int cpu;
- unsigned long cmo_faults = 0;
- unsigned long cmo_fault_time = 0;
-
- seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO));
-
- if (!firmware_has_feature(FW_FEATURE_CMO))
- return;
-
- for_each_possible_cpu(cpu) {
- cmo_faults += lppaca_of(cpu).cmo_faults;
- cmo_fault_time += lppaca_of(cpu).cmo_fault_time;
- }
-
- seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
- seq_printf(m, "cmo_fault_time_usec=%lu\n",
- cmo_fault_time / tb_ticks_per_usec);
- seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp());
- seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp());
- seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size());
-}
-
-static void splpar_dispatch_data(struct seq_file *m)
-{
- int cpu;
- unsigned long dispatches = 0;
- unsigned long dispatch_dispersions = 0;
-
- for_each_possible_cpu(cpu) {
- dispatches += lppaca_of(cpu).yield_count;
- dispatch_dispersions += lppaca_of(cpu).dispersion_count;
- }
-
- seq_printf(m, "dispatches=%lu\n", dispatches);
- seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
-}
-
-static void parse_em_data(struct seq_file *m)
-{
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
- seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
-}
-
-static int pseries_lparcfg_data(struct seq_file *m, void *v)
-{
- int partition_potential_processors;
- int partition_active_processors;
- struct device_node *rtas_node;
- const int *lrdrp = NULL;
-
- rtas_node = of_find_node_by_path("/rtas");
- if (rtas_node)
- lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
-
- if (lrdrp == NULL) {
- partition_potential_processors = vdso_data->processorCount;
- } else {
- partition_potential_processors = *(lrdrp + 4);
- }
- of_node_put(rtas_node);
-
- partition_active_processors = lparcfg_count_active_processors();
-
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- /* this call handles the ibm,get-system-parameter contents */
- parse_system_parameter_string(m);
- parse_ppp_data(m);
- parse_mpp_data(m);
- parse_mpp_x_data(m);
- pseries_cmo_data(m);
- splpar_dispatch_data(m);
-
- seq_printf(m, "purr=%ld\n", get_purr());
- } else { /* non SPLPAR case */
-
- seq_printf(m, "system_active_processors=%d\n",
- partition_potential_processors);
-
- seq_printf(m, "system_potential_processors=%d\n",
- partition_potential_processors);
-
- seq_printf(m, "partition_max_entitled_capacity=%d\n",
- partition_potential_processors * 100);
-
- seq_printf(m, "partition_entitled_capacity=%d\n",
- partition_active_processors * 100);
- }
-
- seq_printf(m, "partition_active_processors=%d\n",
- partition_active_processors);
-
- seq_printf(m, "partition_potential_processors=%d\n",
- partition_potential_processors);
-
- seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc);
-
- seq_printf(m, "slb_size=%d\n", mmu_slb_size);
-
- parse_em_data(m);
-
- return 0;
-}
-
-static ssize_t update_ppp(u64 *entitlement, u8 *weight)
-{
- struct hvcall_ppp_data ppp_data;
- u8 new_weight;
- u64 new_entitled;
- ssize_t retval;
-
- /* Get our current parameters */
- retval = h_get_ppp(&ppp_data);
- if (retval)
- return retval;
-
- if (entitlement) {
- new_weight = ppp_data.weight;
- new_entitled = *entitlement;
- } else if (weight) {
- new_weight = *weight;
- new_entitled = ppp_data.entitlement;
- } else
- return -EINVAL;
-
- pr_debug("%s: current_entitled = %llu, current_weight = %u\n",
- __func__, ppp_data.entitlement, ppp_data.weight);
-
- pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
- __func__, new_entitled, new_weight);
-
- retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
- return retval;
-}
-
-/**
- * update_mpp
- *
- * Update the memory entitlement and weight for the partition. Caller must
- * specify either a new entitlement or weight, not both, to be updated
- * since the h_set_mpp call takes both entitlement and weight as parameters.
- */
-static ssize_t update_mpp(u64 *entitlement, u8 *weight)
-{
- struct hvcall_mpp_data mpp_data;
- u64 new_entitled;
- u8 new_weight;
- ssize_t rc;
-
- if (entitlement) {
- /* Check with vio to ensure the new memory entitlement
- * can be handled.
- */
- rc = vio_cmo_entitlement_update(*entitlement);
- if (rc)
- return rc;
- }
-
- rc = h_get_mpp(&mpp_data);
- if (rc)
- return rc;
-
- if (entitlement) {
- new_weight = mpp_data.mem_weight;
- new_entitled = *entitlement;
- } else if (weight) {
- new_weight = *weight;
- new_entitled = mpp_data.entitled_mem;
- } else
- return -EINVAL;
-
- pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
- __func__, mpp_data.entitled_mem, mpp_data.mem_weight);
-
- pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
- __func__, new_entitled, new_weight);
-
- rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
- return rc;
-}
-
-/*
- * Interface for changing system parameters (variable capacity weight
- * and entitled capacity). Format of input is "param_name=value";
- * anything after value is ignored. Valid parameters at this time are
- * "partition_entitled_capacity" and "capacity_weight". We use
- * H_SET_PPP to alter parameters.
- *
- * This function should be invoked only on systems with
- * FW_FEATURE_SPLPAR.
- */
-static ssize_t lparcfg_write(struct file *file, const char __user * buf,
- size_t count, loff_t * off)
-{
- int kbuf_sz = 64;
- char kbuf[kbuf_sz];
- char *tmp;
- u64 new_entitled, *new_entitled_ptr = &new_entitled;
- u8 new_weight, *new_weight_ptr = &new_weight;
- ssize_t retval;
-
- if (!firmware_has_feature(FW_FEATURE_SPLPAR))
- return -EINVAL;
-
- if (count > kbuf_sz)
- return -EINVAL;
-
- if (copy_from_user(kbuf, buf, count))
- return -EFAULT;
-
- kbuf[count - 1] = '\0';
- tmp = strchr(kbuf, '=');
- if (!tmp)
- return -EINVAL;
-
- *tmp++ = '\0';
-
- if (!strcmp(kbuf, "partition_entitled_capacity")) {
- char *endp;
- *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
- if (endp == tmp)
- return -EINVAL;
-
- retval = update_ppp(new_entitled_ptr, NULL);
- } else if (!strcmp(kbuf, "capacity_weight")) {
- char *endp;
- *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
- if (endp == tmp)
- return -EINVAL;
-
- retval = update_ppp(NULL, new_weight_ptr);
- } else if (!strcmp(kbuf, "entitled_memory")) {
- char *endp;
- *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
- if (endp == tmp)
- return -EINVAL;
-
- retval = update_mpp(new_entitled_ptr, NULL);
- } else if (!strcmp(kbuf, "entitled_memory_weight")) {
- char *endp;
- *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
- if (endp == tmp)
- return -EINVAL;
-
- retval = update_mpp(NULL, new_weight_ptr);
- } else
- return -EINVAL;
-
- if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
- retval = count;
- } else if (retval == H_BUSY) {
- retval = -EBUSY;
- } else if (retval == H_HARDWARE) {
- retval = -EIO;
- } else if (retval == H_PARAMETER) {
- retval = -EINVAL;
- }
-
- return retval;
-}
-
-static int lparcfg_data(struct seq_file *m, void *v)
-{
- struct device_node *rootdn;
- const char *model = "";
- const char *system_id = "";
- const char *tmp;
- const unsigned int *lp_index_ptr;
- unsigned int lp_index = 0;
-
- seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS);
-
- rootdn = of_find_node_by_path("/");
- if (rootdn) {
- tmp = of_get_property(rootdn, "model", NULL);
- if (tmp)
- model = tmp;
- tmp = of_get_property(rootdn, "system-id", NULL);
- if (tmp)
- system_id = tmp;
- lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
- NULL);
- if (lp_index_ptr)
- lp_index = *lp_index_ptr;
- of_node_put(rootdn);
- }
- seq_printf(m, "serial_number=%s\n", system_id);
- seq_printf(m, "system_type=%s\n", model);
- seq_printf(m, "partition_id=%d\n", (int)lp_index);
-
- return pseries_lparcfg_data(m, v);
-}
-
-static int lparcfg_open(struct inode *inode, struct file *file)
-{
- return single_open(file, lparcfg_data, NULL);
-}
-
-static const struct file_operations lparcfg_fops = {
- .owner = THIS_MODULE,
- .read = seq_read,
- .write = lparcfg_write,
- .open = lparcfg_open,
- .release = single_release,
- .llseek = seq_lseek,
-};
-
-static int __init lparcfg_init(void)
-{
- umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
-
- /* Allow writing if we have FW_FEATURE_SPLPAR */
- if (firmware_has_feature(FW_FEATURE_SPLPAR))
- mode |= S_IWUSR;
-
- if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
- printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
- return -EIO;
- }
- return 0;
-}
-
-static void __exit lparcfg_cleanup(void)
-{
- remove_proc_subtree("powerpc/lparcfg", NULL);
-}
-
-module_init(lparcfg_init);
-module_exit(lparcfg_cleanup);
-MODULE_DESCRIPTION("Interface for LPAR configuration data");
-MODULE_AUTHOR("Dave Engebretsen");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e469f30e6eeb..2b0ad9845363 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -36,26 +36,41 @@
.text
+/*
+ * We store the saved ksp_limit in the unused part
+ * of the STACK_FRAME_OVERHEAD
+ */
_GLOBAL(call_do_softirq)
mflr r0
stw r0,4(r1)
+ lwz r10,THREAD+KSP_LIMIT(r2)
+ addi r11,r3,THREAD_INFO_GAP
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
+ stw r10,8(r1)
+ stw r11,THREAD+KSP_LIMIT(r2)
bl __do_softirq
+ lwz r10,8(r1)
lwz r1,0(r1)
lwz r0,4(r1)
+ stw r10,THREAD+KSP_LIMIT(r2)
mtlr r0
blr
-_GLOBAL(call_handle_irq)
+_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
- mtctr r6
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
- mr r1,r5
- bctrl
+ lwz r10,THREAD+KSP_LIMIT(r2)
+ addi r11,r3,THREAD_INFO_GAP
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ stw r10,8(r1)
+ stw r11,THREAD+KSP_LIMIT(r2)
+ bl __do_irq
+ lwz r10,8(r1)
lwz r1,0(r1)
lwz r0,4(r1)
+ stw r10,THREAD+KSP_LIMIT(r2)
mtlr r0
blr
@@ -327,8 +342,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_KPROBE(__flush_icache_range)
+_KPROBE(flush_icache_range)
BEGIN_FTR_SECTION
+ isync
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
li r5,L1_CACHE_BYTES-1
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 6820e45f557b..e59caf874d05 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
-_GLOBAL(call_handle_irq)
- ld r8,0(r6)
+_GLOBAL(call_do_irq)
mflr r0
std r0,16(r1)
- mtctr r8
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
- mr r1,r5
- bctrl
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ bl .__do_irq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
@@ -67,8 +65,10 @@ PPC64_CACHES:
* flush all bytes from start through stop-1 inclusive
*/
-_KPROBE(__flush_icache_range)
-
+_KPROBE(flush_icache_range)
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
/*
* Flush the data cache to memory
*
@@ -247,6 +247,37 @@ _GLOBAL(__bswapdi2)
blr
#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
+
+_GLOBAL(rmci_on)
+ sync
+ isync
+ li r3,0x100
+ rldicl r3,r3,32,0
+ mfspr r5,SPRN_HID4
+ or r5,r5,r3
+ sync
+ mtspr SPRN_HID4,r5
+ isync
+ slbia
+ isync
+ sync
+ blr
+
+_GLOBAL(rmci_off)
+ sync
+ isync
+ li r3,0x100
+ rldicl r3,r3,32,0
+ mfspr r5,SPRN_HID4
+ andc r5,r5,r3
+ sync
+ mtspr SPRN_HID4,r5
+ isync
+ slbia
+ isync
+ sync
+ blr
+
/*
* Do an IO access in real mode
*/
@@ -416,19 +447,6 @@ _GLOBAL(scom970_write)
blr
#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */
-
-/*
- * disable_kernel_fp()
- * Disable the FPU.
- */
-_GLOBAL(disable_kernel_fp)
- mfmsr r3
- rldicl r0,r3,(63-MSR_FP_LG),1
- rldicl r3,r0,(MSR_FP_LG+1),0
- mtmsrd r3 /* disable use of fpu now */
- isync
- blr
-
/* kexec_wait(phys_cpu)
*
* wait for the flag to change, indicating this kernel is going away but
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 48fbc2b97e95..8213ee1eb05a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf,
char *tmp = NULL;
ssize_t size;
- ret = -ENODEV;
- if (!ppc_md.nvram_size)
+ if (!ppc_md.nvram_size) {
+ ret = -ENODEV;
goto out;
+ }
- ret = 0;
size = ppc_md.nvram_size();
- if (*ppos >= size || size < 0)
+ if (size < 0) {
+ ret = size;
+ goto out;
+ }
+
+ if (*ppos >= size) {
+ ret = 0;
goto out;
+ }
count = min_t(size_t, count, size - *ppos);
count = min(count, PAGE_SIZE);
- ret = -ENOMEM;
tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp)
+ if (!tmp) {
+ ret = -ENOMEM;
goto out;
+ }
ret = ppc_md.nvram_read(tmp, count, ppos);
if (ret <= 0)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index f8f24685f10a..3fc16e3beb9f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -34,10 +34,10 @@ extern unsigned long __toc_start;
*/
struct lppaca lppaca[] = {
[0 ... (NR_LPPACAS-1)] = {
- .desc = 0xd397d781, /* "LpPa" */
- .size = sizeof(struct lppaca),
+ .desc = cpu_to_be32(0xd397d781), /* "LpPa" */
+ .size = cpu_to_be16(sizeof(struct lppaca)),
.fpregs_in_use = 1,
- .slb_count = 64,
+ .slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
.page_ins = 0,
},
@@ -101,8 +101,8 @@ static inline void free_lppacas(void) { }
*/
struct slb_shadow slb_shadow[] __cacheline_aligned = {
[0 ... (NR_CPUS-1)] = {
- .persistent = SLB_NUM_BOLTED,
- .buffer_length = sizeof(struct slb_shadow),
+ .persistent = cpu_to_be32(SLB_NUM_BOLTED),
+ .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)),
},
};
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 7f2273cc3c7d..905a24bb7acc 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -306,7 +306,7 @@ static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
unsigned long io_offset = 0;
int i, res_bit;
- if (hose == 0)
+ if (hose == NULL)
return NULL; /* should never happen */
/* If memory, add on the PCI bridge address offset */
@@ -667,7 +667,7 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct device_node *dev, int primary)
{
- const u32 *ranges;
+ const __be32 *ranges;
int rlen;
int pna = of_n_addr_cells(dev);
int np = pna + 5;
@@ -687,7 +687,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
/* Parse it */
while ((rlen -= np * 4) >= 0) {
/* Read next ranges element */
- pci_space = ranges[0];
+ pci_space = of_read_number(ranges, 1);
pci_addr = of_read_number(ranges + 1, 2);
cpu_addr = of_translate_address(dev, ranges + 3);
size = of_read_number(ranges + pna + 3, 2);
@@ -704,7 +704,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
/* Now consume following elements while they are contiguous */
for (; rlen >= np * sizeof(u32);
ranges += np, rlen -= np * 4) {
- if (ranges[0] != pci_space)
+ if (of_read_number(ranges, 1) != pci_space)
break;
pci_next = of_read_number(ranges + 1, 2);
cpu_next = of_translate_address(dev, ranges + 3);
@@ -827,6 +827,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
}
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
struct resource *res = dev->resource + i;
+ struct pci_bus_region reg;
if (!res->flags)
continue;
@@ -835,8 +836,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
* at 0 as unset as well, except if PCI_PROBE_ONLY is also set
* since in that case, we don't want to re-assign anything
*/
+ pcibios_resource_to_bus(dev, &reg, res);
if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
- (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
+ (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
/* Only print message if not re-assigning */
if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] "
@@ -992,7 +994,7 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
ppc_md.pci_dma_bus_setup(bus);
}
-void pcibios_setup_device(struct pci_dev *dev)
+static void pcibios_setup_device(struct pci_dev *dev)
{
/* Fixup NUMA node as it may not be setup yet by the generic
* code and is needed by the DMA init
@@ -1013,6 +1015,17 @@ void pcibios_setup_device(struct pci_dev *dev)
ppc_md.pci_irq_fixup(dev);
}
+int pcibios_add_device(struct pci_dev *dev)
+{
+ /*
+ * We can only call pcibios_setup_device() after bus setup is complete,
+ * since some of the platform specific DMA setup code depends on it.
+ */
+ if (dev->bus->is_added)
+ pcibios_setup_device(dev);
+ return 0;
+}
+
void pcibios_setup_bus_devices(struct pci_bus *bus)
{
struct pci_dev *dev;
@@ -1042,8 +1055,7 @@ void pcibios_fixup_bus(struct pci_bus *bus)
* bases. This is -not- called when generating the PCI tree from
* the OF device-tree.
*/
- if (bus->self != NULL)
- pci_read_bridge_bases(bus);
+ pci_read_bridge_bases(bus);
/* Now fixup the bus bus */
pcibios_setup_bus_self(bus);
@@ -1449,6 +1461,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
/* Allocate bus and devices resources */
pcibios_allocate_bus_resources(bus);
pcibios_claim_one_bus(bus);
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_assign_unassigned_bus_resources(bus);
/* Fixup EEH */
eeh_add_device_tree_late(bus);
@@ -1467,10 +1481,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
if (ppc_md.pcibios_enable_device_hook(dev))
return -EINVAL;
- /* avoid pcie irq fix up impact on cardbus */
- if (dev->hdr_type != PCI_HEADER_TYPE_CARDBUS)
- pcibios_setup_device(dev);
-
return pci_enable_resources(dev, mask);
}
@@ -1567,7 +1577,7 @@ fake_pci_bus(struct pci_controller *hose, int busnr)
{
static struct pci_bus bus;
- if (hose == 0) {
+ if (hose == NULL) {
printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr);
}
bus.number = busnr;
@@ -1663,12 +1673,8 @@ void pcibios_scan_phb(struct pci_controller *hose)
/* Configure PCI Express settings */
if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
struct pci_bus *child;
- list_for_each_entry(child, &bus->children, node) {
- struct pci_dev *self = child->self;
- if (!self)
- continue;
- pcie_bus_configure_settings(child, self->pcie_mpss);
- }
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
}
}
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
new file mode 100644
index 000000000000..c1e17ae68a08
--- /dev/null
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -0,0 +1,110 @@
+/*
+ * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
+ *
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ * Copyright (C) 2005 International Business Machines
+ *
+ * Updates, 2005, John Rose <johnrose@austin.ibm.com>
+ * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
+ * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+/**
+ * pcibios_release_device - release PCI device
+ * @dev: PCI device
+ *
+ * The function is called before releasing the indicated PCI device.
+ */
+void pcibios_release_device(struct pci_dev *dev)
+{
+ eeh_remove_device(dev);
+}
+
+/**
+ * pcibios_remove_pci_devices - remove all devices under this bus
+ * @bus: the indicated PCI bus
+ *
+ * Remove all of the PCI devices under this bus both from the
+ * linux pci device tree, and from the powerpc EEH address cache.
+ */
+void pcibios_remove_pci_devices(struct pci_bus *bus)
+{
+ struct pci_dev *dev, *tmp;
+ struct pci_bus *child_bus;
+
+ /* First go down child busses */
+ list_for_each_entry(child_bus, &bus->children, node)
+ pcibios_remove_pci_devices(child_bus);
+
+ pr_debug("PCI: Removing devices on bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+ list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+ pr_debug(" Removing %s...\n", pci_name(dev));
+ pci_stop_and_remove_bus_device(dev);
+ }
+}
+
+EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
+
+/**
+ * pcibios_add_pci_devices - adds new pci devices to bus
+ * @bus: the indicated PCI bus
+ *
+ * This routine will find and fixup new pci devices under
+ * the indicated bus. This routine presumes that there
+ * might already be some devices under this bridge, so
+ * it carefully tries to add only new devices. (And that
+ * is how this routine differs from other, similar pcibios
+ * routines.)
+ */
+void pcibios_add_pci_devices(struct pci_bus * bus)
+{
+ int slotno, mode, pass, max;
+ struct pci_dev *dev;
+ struct device_node *dn = pci_bus_to_OF_node(bus);
+
+ eeh_add_device_tree_early(dn);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+
+ if (mode == PCI_PROBE_DEVTREE) {
+ /* use ofdt-based probe */
+ of_rescan_bus(dn, bus);
+ } else if (mode == PCI_PROBE_NORMAL) {
+ /*
+ * Use legacy probe. In the partial hotplug case, we
+ * probably have grandchildren devices unplugged. So
+ * we don't check the return value from pci_scan_slot() in
+ * order for fully rescan all the way down to pick them up.
+ * They can have been removed during partial hotplug.
+ */
+ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
+ pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+ pcibios_setup_bus_devices(bus);
+ max = bus->busn_res.start;
+ for (pass = 0; pass < 2; pass++) {
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+ dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+ max = pci_scan_bridge(bus, dev,
+ max, pass);
+ }
+ }
+ }
+ pcibios_finish_adding_to_bus(bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 2e8629654ca8..a9e311f7a9dd 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -109,7 +109,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
hose = pci_bus_to_host(bus);
/* Check if we have IOs allocated */
- if (hose->io_base_alloc == 0)
+ if (hose->io_base_alloc == NULL)
return 0;
pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
@@ -272,7 +272,7 @@ static void quirk_radeon_32bit_msi(struct pci_dev *dev)
struct pci_dn *pdn = pci_get_pdn(dev);
if (pdn)
- pdn->force_32bit_msi = 1;
+ pdn->force_32bit_msi = true;
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon_32bit_msi);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon_32bit_msi);
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index df038442548a..1f61fab59d9b 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -47,9 +47,8 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
void *update_dn_pci_info(struct device_node *dn, void *data)
{
struct pci_controller *phb = data;
- const int *type =
- of_get_property(dn, "ibm,pci-config-space-type", NULL);
- const u32 *regs;
+ const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
+ const __be32 *regs;
struct pci_dn *pdn;
pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
@@ -63,12 +62,14 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
#endif
regs = of_get_property(dn, "reg", NULL);
if (regs) {
+ u32 addr = of_read_number(regs, 1);
+
/* First register entry is addr (00BBSS00) */
- pdn->busno = (regs[0] >> 16) & 0xff;
- pdn->devfn = (regs[0] >> 8) & 0xff;
+ pdn->busno = (addr >> 16) & 0xff;
+ pdn->devfn = (addr >> 8) & 0xff;
}
- pdn->pci_ext_config_space = (type && *type == 1);
+ pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
return NULL;
}
@@ -98,12 +99,13 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
/* We started with a phb, iterate all childs */
for (dn = start->child; dn; dn = nextdn) {
- const u32 *classp;
- u32 class;
+ const __be32 *classp;
+ u32 class = 0;
nextdn = NULL;
classp = of_get_property(dn, "class-code", NULL);
- class = classp ? *classp : 0;
+ if (classp)
+ class = of_read_number(classp, 1);
if (pre && ((ret = pre(dn, data)) != NULL))
return ret;
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 2a67e9baa59f..4368ec6fdc8c 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -24,12 +24,12 @@
*/
static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
{
- const u32 *prop;
+ const __be32 *prop;
int len;
prop = of_get_property(np, name, &len);
if (prop && len >= 4)
- return *prop;
+ return of_read_number(prop, 1);
return def;
}
@@ -77,7 +77,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
unsigned int flags;
struct pci_bus_region region;
struct resource *res;
- const u32 *addrs;
+ const __be32 *addrs;
u32 i;
int proplen;
@@ -86,14 +86,14 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
return;
pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
for (; proplen >= 20; proplen -= 20, addrs += 5) {
- flags = pci_parse_of_flags(addrs[0], 0);
+ flags = pci_parse_of_flags(of_read_number(addrs, 1), 0);
if (!flags)
continue;
base = of_read_number(&addrs[1], 2);
size = of_read_number(&addrs[3], 2);
if (!size)
continue;
- i = addrs[0] & 0xff;
+ i = of_read_number(addrs, 1) & 0xff;
pr_debug(" base: %llx, size: %llx, i: %x\n",
(unsigned long long)base,
(unsigned long long)size, i);
@@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
const char *type;
struct pci_slot *slot;
- dev = alloc_pci_dev();
+ dev = pci_alloc_dev(bus);
if (!dev)
return NULL;
type = of_get_property(node, "device_type", NULL);
@@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
- dev->bus = bus;
dev->dev.of_node = of_node_get(node);
dev->dev.parent = bus->bridge;
dev->dev.bus = &pci_bus_type;
@@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
pr_debug(" class: 0x%x\n", dev->class);
pr_debug(" revision: 0x%x\n", dev->revision);
- dev->current_state = 4; /* unknown power state */
+ dev->current_state = PCI_UNKNOWN; /* unknown power state */
dev->error_state = pci_channel_io_normal;
dev->dma_mask = 0xffffffff;
@@ -208,7 +207,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)
{
struct device_node *node = dev->dev.of_node;
struct pci_bus *bus;
- const u32 *busrange, *ranges;
+ const __be32 *busrange, *ranges;
int len, i, mode;
struct pci_bus_region region;
struct resource *res;
@@ -231,15 +230,21 @@ void of_scan_pci_bridge(struct pci_dev *dev)
return;
}
- bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+ bus = pci_find_bus(pci_domain_nr(dev->bus),
+ of_read_number(busrange, 1));
if (!bus) {
- printk(KERN_ERR "Failed to create pci bus for %s\n",
- node->full_name);
- return;
+ bus = pci_add_new_bus(dev->bus, dev,
+ of_read_number(busrange, 1));
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
}
bus->primary = dev->bus->number;
- pci_bus_insert_busn_res(bus, busrange[0], busrange[1]);
+ pci_bus_insert_busn_res(bus, of_read_number(busrange, 1),
+ of_read_number(busrange+1, 1));
bus->bridge_ctl = 0;
/* parse ranges property */
@@ -252,7 +257,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)
}
i = 1;
for (; len >= 32; len -= 32, ranges += 8) {
- flags = pci_parse_of_flags(ranges[0], 1);
+ flags = pci_parse_of_flags(of_read_number(ranges, 1), 1);
size = of_read_number(&ranges[6], 2);
if (flags == 0 || size == 0)
continue;
@@ -293,6 +298,38 @@ void of_scan_pci_bridge(struct pci_dev *dev)
}
EXPORT_SYMBOL(of_scan_pci_bridge);
+static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+ struct device_node *dn)
+{
+ struct pci_dev *dev = NULL;
+ const u32 *reg;
+ int reglen, devfn;
+
+ pr_debug(" * %s\n", dn->full_name);
+ if (!of_device_is_available(dn))
+ return NULL;
+
+ reg = of_get_property(dn, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ return NULL;
+ devfn = (reg[0] >> 8) & 0xff;
+
+ /* Check if the PCI device is already there */
+ dev = pci_get_slot(bus, devfn);
+ if (dev) {
+ pci_dev_put(dev);
+ return dev;
+ }
+
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(dn, bus, devfn);
+ if (!dev)
+ return NULL;
+
+ pr_debug(" dev header type: %x\n", dev->hdr_type);
+ return dev;
+}
+
/**
* __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
* @node: device tree node for the PCI bus
@@ -303,8 +340,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
int rescan_existing)
{
struct device_node *child;
- const u32 *reg;
- int reglen, devfn;
struct pci_dev *dev;
pr_debug("of_scan_bus(%s) bus no %d...\n",
@@ -312,16 +347,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
/* Scan direct children */
for_each_child_of_node(node, child) {
- pr_debug(" * %s\n", child->full_name);
- if (!of_device_is_available(child))
- continue;
- reg = of_get_property(child, "reg", &reglen);
- if (reg == NULL || reglen < 20)
- continue;
- devfn = (reg[0] >> 8) & 0xff;
-
- /* create a new pci_dev for this device */
- dev = of_create_pci_dev(child, bus, devfn);
+ dev = of_scan_pci_dev(bus, child);
if (!dev)
continue;
pr_debug(" dev header type: %x\n", dev->hdr_type);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index c29666586998..21646dbe1bb3 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -96,7 +96,9 @@ EXPORT_SYMBOL(pci_dram_offset);
EXPORT_SYMBOL(start_thread);
+#ifdef CONFIG_PPC_FPU
EXPORT_SYMBOL(giveup_fpu);
+#endif
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL(giveup_altivec);
#endif /* CONFIG_ALTIVEC */
@@ -111,7 +113,6 @@ EXPORT_SYMBOL(giveup_spe);
#ifndef CONFIG_PPC64
EXPORT_SYMBOL(flush_instruction_cache);
#endif
-EXPORT_SYMBOL(__flush_icache_range);
EXPORT_SYMBOL(flush_dcache_range);
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index feb8580fdc84..c30612aad68e 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -29,25 +29,9 @@
#ifdef CONFIG_PPC64
-static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+static loff_t page_map_seek(struct file *file, loff_t off, int whence)
{
- loff_t new;
- switch(whence) {
- case 0:
- new = off;
- break;
- case 1:
- new = file->f_pos + off;
- break;
- case 2:
- new = PAGE_SIZE + off;
- break;
- default:
- return -EINVAL;
- }
- if ( new < 0 || new > PAGE_SIZE )
- return -EINVAL;
- return (file->f_pos = new);
+ return fixed_size_llseek(file, off, whence, PAGE_SIZE);
}
static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a902723fdc69..96d2fdf3aa9e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -74,6 +74,7 @@ struct task_struct *last_task_used_vsx = NULL;
struct task_struct *last_task_used_spe = NULL;
#endif
+#ifdef CONFIG_PPC_FPU
/*
* Make sure the floating-point register state in the
* the thread_struct is up to date for task tsk.
@@ -107,6 +108,7 @@ void flush_fp_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_fp_to_thread);
+#endif
void enable_kernel_fp(void)
{
@@ -399,7 +401,8 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
{
mtspr(SPRN_DABR, dabr);
- mtspr(SPRN_DABRX, dabrx);
+ if (cpu_has_feature(CPU_FTR_DABRX))
+ mtspr(SPRN_DABRX, dabrx);
return 0;
}
#else
@@ -599,6 +602,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
struct ppc64_tlb_batch *batch;
#endif
+ /* Back up the TAR across context switches.
+ * Note that the TAR is not available for use in the kernel. (To
+ * provide this, the TAR should be backed up/restored on exception
+ * entry/exit instead, and be in pt_regs. FIXME, this should be in
+ * pt_regs anyway (for debug).)
+ * Save the TAR here before we do treclaim/trecheckpoint as these
+ * will change the TAR.
+ */
+ save_tar(&prev->thread);
+
__switch_to_tm(prev);
#ifdef CONFIG_SMP
@@ -915,7 +928,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
flush_altivec_to_thread(src);
flush_vsx_to_thread(src);
flush_spe_to_thread(src);
+
*dst = *src;
+
+ clear_task_ebb(dst);
+
return 0;
}
@@ -983,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
+#ifdef CONFIG_PPC32
p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
_ALIGN_UP(sizeof(struct thread_info), 16);
-
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
p->thread.ptrace_bps[0] = NULL;
#endif
@@ -1368,7 +1386,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
#ifdef CONFIG_PPC64
/* Called with hard IRQs off */
-void __ppc64_runlatch_on(void)
+void notrace __ppc64_runlatch_on(void)
{
struct thread_info *ti = current_thread_info();
unsigned long ctrl;
@@ -1381,7 +1399,7 @@ void __ppc64_runlatch_on(void)
}
/* Called with hard IRQs off */
-void __ppc64_runlatch_off(void)
+void notrace __ppc64_runlatch_off(void)
{
struct thread_info *ti = current_thread_info();
unsigned long ctrl;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b6f7a99cce2..b7634ce41dbc 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -215,16 +215,16 @@ static void __init check_cpu_pa_features(unsigned long node)
#ifdef CONFIG_PPC_STD_MMU_64
static void __init check_cpu_slb_size(unsigned long node)
{
- u32 *slb_size_ptr;
+ __be32 *slb_size_ptr;
slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
if (slb_size_ptr != NULL) {
- mmu_slb_size = *slb_size_ptr;
+ mmu_slb_size = be32_to_cpup(slb_size_ptr);
return;
}
slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
if (slb_size_ptr != NULL) {
- mmu_slb_size = *slb_size_ptr;
+ mmu_slb_size = be32_to_cpup(slb_size_ptr);
}
}
#else
@@ -279,11 +279,11 @@ static void __init check_cpu_feature_properties(unsigned long node)
{
unsigned long i;
struct feature_property *fp = feature_properties;
- const u32 *prop;
+ const __be32 *prop;
for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
prop = of_get_flat_dt_prop(node, fp->name, NULL);
- if (prop && *prop >= fp->min_value) {
+ if (prop && be32_to_cpup(prop) >= fp->min_value) {
cur_cpu_spec->cpu_features |= fp->cpu_feature;
cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr;
}
@@ -295,8 +295,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
void *data)
{
char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- const u32 *prop;
- const u32 *intserv;
+ const __be32 *prop;
+ const __be32 *intserv;
int i, nthreads;
unsigned long len;
int found = -1;
@@ -324,8 +324,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* version 2 of the kexec param format adds the phys cpuid of
* booted proc.
*/
- if (initial_boot_params->version >= 2) {
- if (intserv[i] == initial_boot_params->boot_cpuid_phys) {
+ if (be32_to_cpu(initial_boot_params->version) >= 2) {
+ if (be32_to_cpu(intserv[i]) ==
+ be32_to_cpu(initial_boot_params->boot_cpuid_phys)) {
found = boot_cpu_count;
found_thread = i;
}
@@ -347,9 +348,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (found >= 0) {
DBG("boot cpu: logical %d physical %d\n", found,
- intserv[found_thread]);
+ be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
- set_hard_smp_processor_id(found, intserv[found_thread]);
+ set_hard_smp_processor_id(found,
+ be32_to_cpu(intserv[found_thread]));
/*
* PAPR defines "logical" PVR values for cpus that
@@ -366,8 +368,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* it uses 0x0f000001.
*/
prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
- if (prop && (*prop & 0xff000000) == 0x0f000000)
- identify_cpu(0, *prop);
+ if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+ identify_cpu(0, be32_to_cpup(prop));
identical_pvr_fixup(node);
}
@@ -389,7 +391,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
int depth, void *data)
{
- unsigned long *lprop;
+ unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
/* Use common scan routine to determine if this is the chosen node */
if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
@@ -454,7 +456,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
if (dm == NULL || l < sizeof(__be32))
return 0;
- n = *dm++; /* number of entries */
+ n = of_read_number(dm++, 1); /* number of entries */
if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
return 0;
@@ -466,7 +468,7 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
for (; n != 0; --n) {
base = dt_mem_next_cell(dt_root_addr_cells, &dm);
- flags = dm[3];
+ flags = of_read_number(&dm[3], 1);
/* skip DRC index, pad, assoc. list index, flags */
dm += 4;
/* skip this block if the reserved bit is set in flags (0x80)
@@ -544,14 +546,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
memblock_add(base, size);
}
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
- return __va(memblock_alloc(size, align));
-}
-
#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
- unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
{
initrd_start = (unsigned long)__va(start);
initrd_end = (unsigned long)__va(end);
@@ -559,27 +555,60 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start,
}
#endif
+static void __init early_reserve_mem_dt(void)
+{
+ unsigned long i, len, dt_root;
+ const __be32 *prop;
+
+ dt_root = of_get_flat_dt_root();
+
+ prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
+
+ if (!prop)
+ return;
+
+ DBG("Found new-style reserved-ranges\n");
+
+ /* Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range. */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ DBG("reserving: %llx -> %llx\n", base, size);
+ memblock_reserve(base, size);
+ }
+ }
+}
+
static void __init early_reserve_mem(void)
{
u64 base, size;
- u64 *reserve_map;
+ __be64 *reserve_map;
unsigned long self_base;
unsigned long self_size;
- reserve_map = (u64 *)(((unsigned long)initial_boot_params) +
- initial_boot_params->off_mem_rsvmap);
+ reserve_map = (__be64 *)(((unsigned long)initial_boot_params) +
+ be32_to_cpu(initial_boot_params->off_mem_rsvmap));
/* before we do anything, lets reserve the dt blob */
self_base = __pa((unsigned long)initial_boot_params);
- self_size = initial_boot_params->totalsize;
+ self_size = be32_to_cpu(initial_boot_params->totalsize);
memblock_reserve(self_base, self_size);
+ /* Look for the new "reserved-regions" property in the DT */
+ early_reserve_mem_dt();
+
#ifdef CONFIG_BLK_DEV_INITRD
- /* then reserve the initrd, if any */
- if (initrd_start && (initrd_end > initrd_start))
+ /* Then reserve the initrd, if any */
+ if (initrd_start && (initrd_end > initrd_start)) {
memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
_ALIGN_UP(initrd_end, PAGE_SIZE) -
_ALIGN_DOWN(initrd_start, PAGE_SIZE));
+ }
#endif /* CONFIG_BLK_DEV_INITRD */
#ifdef CONFIG_PPC32
@@ -587,13 +616,15 @@ static void __init early_reserve_mem(void)
* Handle the case where we might be booting from an old kexec
* image that setup the mem_rsvmap as pairs of 32-bit values
*/
- if (*reserve_map > 0xffffffffull) {
+ if (be64_to_cpup(reserve_map) > 0xffffffffull) {
u32 base_32, size_32;
- u32 *reserve_map_32 = (u32 *)reserve_map;
+ __be32 *reserve_map_32 = (__be32 *)reserve_map;
+
+ DBG("Found old 32-bit reserve map\n");
while (1) {
- base_32 = *(reserve_map_32++);
- size_32 = *(reserve_map_32++);
+ base_32 = be32_to_cpup(reserve_map_32++);
+ size_32 = be32_to_cpup(reserve_map_32++);
if (size_32 == 0)
break;
/* skip if the reservation is for the blob */
@@ -605,9 +636,12 @@ static void __init early_reserve_mem(void)
return;
}
#endif
+ DBG("Processing reserve map\n");
+
+ /* Handle the reserve map in the fdt blob if it exists */
while (1) {
- base = *(reserve_map++);
- size = *(reserve_map++);
+ base = be64_to_cpup(reserve_map++);
+ size = be64_to_cpup(reserve_map++);
if (size == 0)
break;
DBG("reserving: %llx -> %llx\n", base, size);
@@ -757,6 +791,32 @@ struct device_node *of_find_next_cache_node(struct device_node *np)
return NULL;
}
+/**
+ * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device
+ * @np: device node of the device
+ *
+ * This looks for a property "ibm,chip-id" in the node or any
+ * of its parents and returns its content, or -1 if it cannot
+ * be found.
+ */
+int of_get_ibm_chip_id(struct device_node *np)
+{
+ of_node_get(np);
+ while(np) {
+ struct device_node *old = np;
+ const __be32 *prop;
+
+ prop = of_get_property(np, "ibm,chip-id", NULL);
+ if (prop) {
+ of_node_put(np);
+ return be32_to_cpup(prop);
+ }
+ np = of_get_parent(np);
+ of_node_put(old);
+ }
+ return -1;
+}
+
#ifdef CONFIG_PPC_PSERIES
/*
* Fix up the uninitialized fields in a new device node:
@@ -827,49 +887,10 @@ static int __init prom_reconfig_setup(void)
__initcall(prom_reconfig_setup);
#endif
-/* Find the device node for a given logical cpu number, also returns the cpu
- * local thread number (index in ibm,interrupt-server#s) if relevant and
- * asked for (non NULL)
- */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
- int hardid;
- struct device_node *np;
-
- hardid = get_hard_smp_processor_id(cpu);
-
- for_each_node_by_type(np, "cpu") {
- const u32 *intserv;
- unsigned int plen, t;
-
- /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
- * fallback to "reg" property and assume no threads
- */
- intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
- &plen);
- if (intserv == NULL) {
- const u32 *reg = of_get_property(np, "reg", NULL);
- if (reg == NULL)
- continue;
- if (*reg == hardid) {
- if (thread)
- *thread = 0;
- return np;
- }
- } else {
- plen /= sizeof(u32);
- for (t = 0; t < plen; t++) {
- if (hardid == intserv[t]) {
- if (thread)
- *thread = t;
- return np;
- }
- }
- }
- }
- return NULL;
+ return (int)phys_id == get_hard_smp_processor_id(cpu);
}
-EXPORT_SYMBOL(of_get_cpu_node);
#if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
static struct debugfs_blob_wrapper flat_dt_blob;
@@ -879,7 +900,7 @@ static int __init export_flat_device_tree(void)
struct dentry *d;
flat_dt_blob.data = initial_boot_params;
- flat_dt_blob.size = initial_boot_params->totalsize;
+ flat_dt_blob.size = be32_to_cpu(initial_boot_params->totalsize);
d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR,
powerpc_debugfs_root, &flat_dt_blob);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5eccda9fd33f..5fe2842e8bab 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -107,10 +107,10 @@ int of_workarounds;
typedef u32 prom_arg_t;
struct prom_args {
- u32 service;
- u32 nargs;
- u32 nret;
- prom_arg_t args[10];
+ __be32 service;
+ __be32 nargs;
+ __be32 nret;
+ __be32 args[10];
};
struct prom_t {
@@ -123,11 +123,11 @@ struct prom_t {
};
struct mem_map_entry {
- u64 base;
- u64 size;
+ __be64 base;
+ __be64 size;
};
-typedef u32 cell_t;
+typedef __be32 cell_t;
extern void __start(unsigned long r3, unsigned long r4, unsigned long r5,
unsigned long r6, unsigned long r7, unsigned long r8,
@@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt;
static cell_t __initdata regbuf[1024];
+static bool rtas_has_query_cpu_stopped;
+
/*
* Error results ... some OF calls will return "-1" on error, some
@@ -219,13 +221,13 @@ static int __init call_prom(const char *service, int nargs, int nret, ...)
struct prom_args args;
va_list list;
- args.service = ADDR(service);
- args.nargs = nargs;
- args.nret = nret;
+ args.service = cpu_to_be32(ADDR(service));
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nret);
va_start(list, nret);
for (i = 0; i < nargs; i++)
- args.args[i] = va_arg(list, prom_arg_t);
+ args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
va_end(list);
for (i = 0; i < nret; i++)
@@ -234,7 +236,7 @@ static int __init call_prom(const char *service, int nargs, int nret, ...)
if (enter_prom(&args, prom_entry) < 0)
return PROM_ERROR;
- return (nret > 0) ? args.args[nargs] : 0;
+ return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
}
static int __init call_prom_ret(const char *service, int nargs, int nret,
@@ -244,13 +246,13 @@ static int __init call_prom_ret(const char *service, int nargs, int nret,
struct prom_args args;
va_list list;
- args.service = ADDR(service);
- args.nargs = nargs;
- args.nret = nret;
+ args.service = cpu_to_be32(ADDR(service));
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nret);
va_start(list, rets);
for (i = 0; i < nargs; i++)
- args.args[i] = va_arg(list, prom_arg_t);
+ args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
va_end(list);
for (i = 0; i < nret; i++)
@@ -261,9 +263,9 @@ static int __init call_prom_ret(const char *service, int nargs, int nret,
if (rets != NULL)
for (i = 1; i < nret; ++i)
- rets[i-1] = args.args[nargs+i];
+ rets[i-1] = be32_to_cpu(args.args[nargs+i]);
- return (nret > 0) ? args.args[nargs] : 0;
+ return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
}
@@ -527,7 +529,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
#define islower(c) ('a' <= (c) && (c) <= 'z')
#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c))
-unsigned long prom_strtoul(const char *cp, const char **endp)
+static unsigned long prom_strtoul(const char *cp, const char **endp)
{
unsigned long result = 0, base = 10, value;
@@ -552,7 +554,7 @@ unsigned long prom_strtoul(const char *cp, const char **endp)
return result;
}
-unsigned long prom_memparse(const char *ptr, const char **retptr)
+static unsigned long prom_memparse(const char *ptr, const char **retptr)
{
unsigned long ret = prom_strtoul(ptr, retptr);
int shift = 0;
@@ -644,7 +646,8 @@ unsigned char ibm_architecture_vec[] = {
W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
W(0xffff0000), W(0x003e0000), /* POWER6 */
W(0xffff0000), W(0x003f0000), /* POWER7 */
- W(0xffff0000), W(0x004b0000), /* POWER8 */
+ W(0xffff0000), W(0x004b0000), /* POWER8E */
+ W(0xffff0000), W(0x004d0000), /* POWER8 */
W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
@@ -706,7 +709,7 @@ unsigned char ibm_architecture_vec[] = {
* must match by the macro below. Update the definition if
* the structure layout changes.
*/
-#define IBM_ARCH_VEC_NRCORES_OFFSET 117
+#define IBM_ARCH_VEC_NRCORES_OFFSET 125
W(NR_CPUS), /* number of cores supported */
0,
0,
@@ -723,7 +726,8 @@ unsigned char ibm_architecture_vec[] = {
};
-/* Old method - ELF header with PT_NOTE sections */
+/* Old method - ELF header with PT_NOTE sections only works on BE */
+#ifdef __BIG_ENDIAN__
static struct fake_elf {
Elf32_Ehdr elfhdr;
Elf32_Phdr phdr[2];
@@ -809,6 +813,7 @@ static struct fake_elf {
}
}
};
+#endif /* __BIG_ENDIAN__ */
static int __init prom_count_smt_threads(void)
{
@@ -851,9 +856,9 @@ static int __init prom_count_smt_threads(void)
static void __init prom_send_capabilities(void)
{
- ihandle elfloader, root;
+ ihandle root;
prom_arg_t ret;
- u32 *cores;
+ __be32 *cores;
root = call_prom("open", 1, 1, ADDR("/"));
if (root != 0) {
@@ -863,15 +868,15 @@ static void __init prom_send_capabilities(void)
* (we assume this is the same for all cores) and use it to
* divide NR_CPUS.
*/
- cores = (u32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
- if (*cores != NR_CPUS) {
+ cores = (__be32 *)&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
+ if (be32_to_cpup(cores) != NR_CPUS) {
prom_printf("WARNING ! "
"ibm_architecture_vec structure inconsistent: %lu!\n",
- *cores);
+ be32_to_cpup(cores));
} else {
- *cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
+ *cores = cpu_to_be32(DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()));
prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
- *cores, NR_CPUS);
+ be32_to_cpup(cores), NR_CPUS);
}
/* try calling the ibm,client-architecture-support method */
@@ -892,17 +897,24 @@ static void __init prom_send_capabilities(void)
prom_printf(" not implemented\n");
}
- /* no ibm,client-architecture-support call, try the old way */
- elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader"));
- if (elfloader == 0) {
- prom_printf("couldn't open /packages/elf-loader\n");
- return;
+#ifdef __BIG_ENDIAN__
+ {
+ ihandle elfloader;
+
+ /* no ibm,client-architecture-support call, try the old way */
+ elfloader = call_prom("open", 1, 1,
+ ADDR("/packages/elf-loader"));
+ if (elfloader == 0) {
+ prom_printf("couldn't open /packages/elf-loader\n");
+ return;
+ }
+ call_prom("call-method", 3, 1, ADDR("process-elf-header"),
+ elfloader, ADDR(&fake_elf));
+ call_prom("close", 1, 0, elfloader);
}
- call_prom("call-method", 3, 1, ADDR("process-elf-header"),
- elfloader, ADDR(&fake_elf));
- call_prom("close", 1, 0, elfloader);
+#endif /* __BIG_ENDIAN__ */
}
-#endif
+#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
/*
* Memory allocation strategy... our layout is normally:
@@ -1049,11 +1061,11 @@ static unsigned long __init prom_next_cell(int s, cell_t **cellp)
p++;
s--;
}
- r = *p++;
+ r = be32_to_cpu(*p++);
#ifdef CONFIG_PPC64
if (s > 1) {
r <<= 32;
- r |= *(p++);
+ r |= be32_to_cpu(*(p++));
}
#endif
*cellp = p;
@@ -1086,8 +1098,8 @@ static void __init reserve_mem(u64 base, u64 size)
if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
prom_panic("Memory reserve map exhausted !\n");
- mem_reserve_map[cnt].base = base;
- mem_reserve_map[cnt].size = size;
+ mem_reserve_map[cnt].base = cpu_to_be64(base);
+ mem_reserve_map[cnt].size = cpu_to_be64(size);
mem_reserve_cnt = cnt + 1;
}
@@ -1101,6 +1113,7 @@ static void __init prom_init_mem(void)
char *path, type[64];
unsigned int plen;
cell_t *p, *endp;
+ __be32 val;
u32 rac, rsc;
/*
@@ -1108,12 +1121,14 @@ static void __init prom_init_mem(void)
* 1) top of RMO (first node)
* 2) top of memory
*/
- rac = 2;
- prom_getprop(prom.root, "#address-cells", &rac, sizeof(rac));
- rsc = 1;
- prom_getprop(prom.root, "#size-cells", &rsc, sizeof(rsc));
- prom_debug("root_addr_cells: %x\n", (unsigned long) rac);
- prom_debug("root_size_cells: %x\n", (unsigned long) rsc);
+ val = cpu_to_be32(2);
+ prom_getprop(prom.root, "#address-cells", &val, sizeof(val));
+ rac = be32_to_cpu(val);
+ val = cpu_to_be32(1);
+ prom_getprop(prom.root, "#size-cells", &val, sizeof(rsc));
+ rsc = be32_to_cpu(val);
+ prom_debug("root_addr_cells: %x\n", rac);
+ prom_debug("root_size_cells: %x\n", rsc);
prom_debug("scanning memory:\n");
path = prom_scratch;
@@ -1221,25 +1236,23 @@ static void __init prom_init_mem(void)
static void __init prom_close_stdin(void)
{
- ihandle val;
+ __be32 val;
+ ihandle stdin;
- if (prom_getprop(prom.chosen, "stdin", &val, sizeof(val)) > 0)
- call_prom("close", 1, 0, val);
+ if (prom_getprop(prom.chosen, "stdin", &val, sizeof(val)) > 0) {
+ stdin = be32_to_cpu(val);
+ call_prom("close", 1, 0, stdin);
+ }
}
#ifdef CONFIG_PPC_POWERNV
-static u64 __initdata prom_opal_size;
-static u64 __initdata prom_opal_align;
-static int __initdata prom_rtas_start_cpu;
-static u64 __initdata prom_rtas_data;
-static u64 __initdata prom_rtas_entry;
-
#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
static u64 __initdata prom_opal_base;
static u64 __initdata prom_opal_entry;
#endif
+#ifdef __BIG_ENDIAN__
/* XXX Don't change this structure without updating opal-takeover.S */
static struct opal_secondary_data {
s64 ack; /* 0 */
@@ -1247,6 +1260,12 @@ static struct opal_secondary_data {
struct opal_takeover_args args; /* 16 */
} opal_secondary_data;
+static u64 __initdata prom_opal_align;
+static u64 __initdata prom_opal_size;
+static int __initdata prom_rtas_start_cpu;
+static u64 __initdata prom_rtas_data;
+static u64 __initdata prom_rtas_entry;
+
extern char opal_secondary_entry;
static void __init prom_query_opal(void)
@@ -1264,6 +1283,7 @@ static void __init prom_query_opal(void)
}
prom_printf("Querying for OPAL presence... ");
+
rc = opal_query_takeover(&prom_opal_size,
&prom_opal_align);
prom_debug("(rc = %ld) ", rc);
@@ -1279,7 +1299,8 @@ static void __init prom_query_opal(void)
prom_opal_align = 0x10000;
}
-static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...)
+static int __init prom_rtas_call(int token, int nargs, int nret,
+ int *outputs, ...)
{
struct rtas_args rtas_args;
va_list list;
@@ -1424,6 +1445,7 @@ static void __init prom_opal_takeover(void)
for (;;)
opal_do_takeover(args);
}
+#endif /* __BIG_ENDIAN__ */
/*
* Allocate room for and instantiate OPAL
@@ -1434,6 +1456,7 @@ static void __init prom_instantiate_opal(void)
ihandle opal_inst;
u64 base, entry;
u64 size = 0, align = 0x10000;
+ __be64 val64;
u32 rets[2];
prom_debug("prom_instantiate_opal: start...\n");
@@ -1443,11 +1466,14 @@ static void __init prom_instantiate_opal(void)
if (!PHANDLE_VALID(opal_node))
return;
- prom_getprop(opal_node, "opal-runtime-size", &size, sizeof(size));
+ val64 = 0;
+ prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
+ size = be64_to_cpu(val64);
if (size == 0)
return;
- prom_getprop(opal_node, "opal-runtime-alignment", &align,
- sizeof(align));
+ val64 = 0;
+ prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
+ align = be64_to_cpu(val64);
base = alloc_down(size, align, 0);
if (base == 0) {
@@ -1504,6 +1530,7 @@ static void __init prom_instantiate_rtas(void)
phandle rtas_node;
ihandle rtas_inst;
u32 base, entry = 0;
+ __be32 val;
u32 size = 0;
prom_debug("prom_instantiate_rtas: start...\n");
@@ -1513,7 +1540,9 @@ static void __init prom_instantiate_rtas(void)
if (!PHANDLE_VALID(rtas_node))
return;
- prom_getprop(rtas_node, "rtas-size", &size, sizeof(size));
+ val = 0;
+ prom_getprop(rtas_node, "rtas-size", &val, sizeof(size));
+ size = be32_to_cpu(val);
if (size == 0)
return;
@@ -1540,12 +1569,19 @@ static void __init prom_instantiate_rtas(void)
reserve_mem(base, size);
+ val = cpu_to_be32(base);
prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
- &base, sizeof(base));
+ &val, sizeof(val));
+ val = cpu_to_be32(entry);
prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
- &entry, sizeof(entry));
+ &val, sizeof(val));
-#ifdef CONFIG_PPC_POWERNV
+ /* Check if it supports "query-cpu-stopped-state" */
+ if (prom_getprop(rtas_node, "query-cpu-stopped-state",
+ &val, sizeof(val)) != PROM_ERROR)
+ rtas_has_query_cpu_stopped = true;
+
+#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__)
/* PowerVN takeover hack */
prom_rtas_data = base;
prom_rtas_entry = entry;
@@ -1619,6 +1655,7 @@ static void __init prom_instantiate_sml(void)
/*
* Allocate room for and initialize TCE tables
*/
+#ifdef __BIG_ENDIAN__
static void __init prom_initialize_tce_table(void)
{
phandle node;
@@ -1747,7 +1784,8 @@ static void __init prom_initialize_tce_table(void)
/* Flag the first invalid entry */
prom_debug("ending prom_initialize_tce_table\n");
}
-#endif
+#endif /* __BIG_ENDIAN__ */
+#endif /* CONFIG_PPC64 */
/*
* With CHRP SMP we need to use the OF to start the other processors.
@@ -1776,7 +1814,6 @@ static void __init prom_initialize_tce_table(void)
static void __init prom_hold_cpus(void)
{
unsigned long i;
- unsigned int reg;
phandle node;
char type[64];
unsigned long *spinloop
@@ -1785,6 +1822,18 @@ static void __init prom_hold_cpus(void)
= (void *) LOW_ADDR(__secondary_hold_acknowledge);
unsigned long secondary_hold = LOW_ADDR(__secondary_hold);
+ /*
+ * On pseries, if RTAS supports "query-cpu-stopped-state",
+ * we skip this stage, the CPUs will be started by the
+ * kernel using RTAS.
+ */
+ if ((of_platform == PLATFORM_PSERIES ||
+ of_platform == PLATFORM_PSERIES_LPAR) &&
+ rtas_has_query_cpu_stopped) {
+ prom_printf("prom_hold_cpus: skipped\n");
+ return;
+ }
+
prom_debug("prom_hold_cpus: start...\n");
prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop);
prom_debug(" 1) *spinloop = 0x%x\n", *spinloop);
@@ -1802,6 +1851,9 @@ static void __init prom_hold_cpus(void)
/* look for cpus */
for (node = 0; prom_next_node(&node); ) {
+ unsigned int cpu_no;
+ __be32 reg;
+
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
if (strcmp(type, "cpu") != 0)
@@ -1812,10 +1864,11 @@ static void __init prom_hold_cpus(void)
if (strcmp(type, "okay") != 0)
continue;
- reg = -1;
+ reg = cpu_to_be32(-1); /* make sparse happy */
prom_getprop(node, "reg", &reg, sizeof(reg));
+ cpu_no = be32_to_cpu(reg);
- prom_debug("cpu hw idx = %lu\n", reg);
+ prom_debug("cpu hw idx = %lu\n", cpu_no);
/* Init the acknowledge var which will be reset by
* the secondary cpu when it awakens from its OF
@@ -1823,24 +1876,24 @@ static void __init prom_hold_cpus(void)
*/
*acknowledge = (unsigned long)-1;
- if (reg != prom.cpu) {
+ if (cpu_no != prom.cpu) {
/* Primary Thread of non-boot cpu or any thread */
- prom_printf("starting cpu hw idx %lu... ", reg);
+ prom_printf("starting cpu hw idx %lu... ", cpu_no);
call_prom("start-cpu", 3, 0, node,
- secondary_hold, reg);
+ secondary_hold, cpu_no);
for (i = 0; (i < 100000000) &&
(*acknowledge == ((unsigned long)-1)); i++ )
mb();
- if (*acknowledge == reg)
+ if (*acknowledge == cpu_no)
prom_printf("done\n");
else
prom_printf("failed: %x\n", *acknowledge);
}
#ifdef CONFIG_SMP
else
- prom_printf("boot cpu hw idx %lu\n", reg);
+ prom_printf("boot cpu hw idx %lu\n", cpu_no);
#endif /* CONFIG_SMP */
}
@@ -1894,6 +1947,7 @@ static void __init prom_find_mmu(void)
prom.memory = call_prom("open", 1, 1, ADDR("/memory"));
prom_getprop(prom.chosen, "mmu", &prom.mmumap,
sizeof(prom.mmumap));
+ prom.mmumap = be32_to_cpu(prom.mmumap);
if (!IHANDLE_VALID(prom.memory) || !IHANDLE_VALID(prom.mmumap))
of_workarounds &= ~OF_WA_CLAIM; /* hmmm */
}
@@ -1905,17 +1959,19 @@ static void __init prom_init_stdout(void)
{
char *path = of_stdout_device;
char type[16];
- u32 val;
+ phandle stdout_node;
+ __be32 val;
if (prom_getprop(prom.chosen, "stdout", &val, sizeof(val)) <= 0)
prom_panic("cannot find stdout");
- prom.stdout = val;
+ prom.stdout = be32_to_cpu(val);
/* Get the full OF pathname of the stdout device */
memset(path, 0, 256);
call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
- val = call_prom("instance-to-package", 1, 1, prom.stdout);
+ stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
+ val = cpu_to_be32(stdout_node);
prom_setprop(prom.chosen, "/chosen", "linux,stdout-package",
&val, sizeof(val));
prom_printf("OF stdout device is: %s\n", of_stdout_device);
@@ -1924,9 +1980,9 @@ static void __init prom_init_stdout(void)
/* If it's a display, note it */
memset(type, 0, sizeof(type));
- prom_getprop(val, "device_type", type, sizeof(type));
+ prom_getprop(stdout_node, "device_type", type, sizeof(type));
if (strcmp(type, "display") == 0)
- prom_setprop(val, path, "linux,boot-display", NULL, 0);
+ prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
}
static int __init prom_find_machine_type(void)
@@ -2081,6 +2137,22 @@ static void __init prom_check_displays(void)
clut[2]) != 0)
break;
#endif /* CONFIG_LOGO_LINUX_CLUT224 */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+ if (prom_getprop(node, "linux,boot-display", NULL, 0) !=
+ PROM_ERROR) {
+ u32 width, height, pitch, addr;
+
+ prom_printf("Setting btext !\n");
+ prom_getprop(node, "width", &width, 4);
+ prom_getprop(node, "height", &height, 4);
+ prom_getprop(node, "linebytes", &pitch, 4);
+ prom_getprop(node, "address", &addr, 4);
+ prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
+ width, height, pitch, addr);
+ btext_setup_display(width, height, 8, pitch, addr);
+ }
+#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
}
}
@@ -2116,8 +2188,10 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
return ret;
}
-#define dt_push_token(token, mem_start, mem_end) \
- do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0)
+#define dt_push_token(token, mem_start, mem_end) do { \
+ void *room = make_room(mem_start, mem_end, 4, 4); \
+ *(__be32 *)room = cpu_to_be32(token); \
+ } while(0)
static unsigned long __init dt_find_string(char *str)
{
@@ -2290,7 +2364,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
dt_push_token(4, mem_start, mem_end);
dt_push_token(soff, mem_start, mem_end);
valp = make_room(mem_start, mem_end, 4, 4);
- *(u32 *)valp = node;
+ *(__be32 *)valp = cpu_to_be32(node);
}
}
@@ -2363,16 +2437,16 @@ static void __init flatten_device_tree(void)
dt_struct_end = PAGE_ALIGN(mem_start);
/* Finish header */
- hdr->boot_cpuid_phys = prom.cpu;
- hdr->magic = OF_DT_HEADER;
- hdr->totalsize = dt_struct_end - dt_header_start;
- hdr->off_dt_struct = dt_struct_start - dt_header_start;
- hdr->off_dt_strings = dt_string_start - dt_header_start;
- hdr->dt_strings_size = dt_string_end - dt_string_start;
- hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - dt_header_start;
- hdr->version = OF_DT_VERSION;
+ hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
+ hdr->magic = cpu_to_be32(OF_DT_HEADER);
+ hdr->totalsize = cpu_to_be32(dt_struct_end - dt_header_start);
+ hdr->off_dt_struct = cpu_to_be32(dt_struct_start - dt_header_start);
+ hdr->off_dt_strings = cpu_to_be32(dt_string_start - dt_header_start);
+ hdr->dt_strings_size = cpu_to_be32(dt_string_end - dt_string_start);
+ hdr->off_mem_rsvmap = cpu_to_be32(((unsigned long)rsvmap) - dt_header_start);
+ hdr->version = cpu_to_be32(OF_DT_VERSION);
/* Version 16 is not backward compatible */
- hdr->last_comp_version = 0x10;
+ hdr->last_comp_version = cpu_to_be32(0x10);
/* Copy the reserve map in */
memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
@@ -2383,8 +2457,8 @@ static void __init flatten_device_tree(void)
prom_printf("reserved memory map:\n");
for (i = 0; i < mem_reserve_cnt; i++)
prom_printf(" %x - %x\n",
- mem_reserve_map[i].base,
- mem_reserve_map[i].size);
+ be64_to_cpu(mem_reserve_map[i].base),
+ be64_to_cpu(mem_reserve_map[i].size));
}
#endif
/* Bump mem_reserve_cnt to cause further reservations to fail
@@ -2396,7 +2470,6 @@ static void __init flatten_device_tree(void)
dt_string_start, dt_string_end);
prom_printf("Device tree struct 0x%x -> 0x%x\n",
dt_struct_start, dt_struct_end);
-
}
#ifdef CONFIG_PPC_MAPLE
@@ -2729,18 +2802,19 @@ static void __init fixup_device_tree(void)
static void __init prom_find_boot_cpu(void)
{
- u32 getprop_rval;
+ __be32 rval;
ihandle prom_cpu;
phandle cpu_pkg;
- prom.cpu = 0;
- if (prom_getprop(prom.chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0)
+ rval = 0;
+ if (prom_getprop(prom.chosen, "cpu", &rval, sizeof(rval)) <= 0)
return;
+ prom_cpu = be32_to_cpu(rval);
cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
- prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));
- prom.cpu = getprop_rval;
+ prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval));
+ prom.cpu = be32_to_cpu(rval);
prom_debug("Booting CPU hw index = %lu\n", prom.cpu);
}
@@ -2749,15 +2823,15 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
{
#ifdef CONFIG_BLK_DEV_INITRD
if (r3 && r4 && r4 != 0xdeadbeef) {
- unsigned long val;
+ __be64 val;
prom_initrd_start = is_kernel_addr(r3) ? __pa(r3) : r3;
prom_initrd_end = prom_initrd_start + r4;
- val = prom_initrd_start;
+ val = cpu_to_be64(prom_initrd_start);
prom_setprop(prom.chosen, "/chosen", "linux,initrd-start",
&val, sizeof(val));
- val = prom_initrd_end;
+ val = cpu_to_be64(prom_initrd_end);
prom_setprop(prom.chosen, "/chosen", "linux,initrd-end",
&val, sizeof(val));
@@ -2914,7 +2988,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
prom_check_displays();
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) && defined(__BIG_ENDIAN__)
/*
* Initialize IOMMU (TCE tables) on pSeries. Do that before anything else
* that uses the allocator, we need to make sure we get the top of memory
@@ -2933,6 +3007,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_instantiate_rtas();
#ifdef CONFIG_PPC_POWERNV
+#ifdef __BIG_ENDIAN__
/* Detect HAL and try instanciating it & doing takeover */
if (of_platform == PLATFORM_PSERIES_LPAR) {
prom_query_opal();
@@ -2940,9 +3015,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_opal_hold_cpus();
prom_opal_takeover();
}
- } else if (of_platform == PLATFORM_OPAL)
+ } else
+#endif /* __BIG_ENDIAN__ */
+ if (of_platform == PLATFORM_OPAL)
prom_instantiate_opal();
-#endif
+#endif /* CONFIG_PPC_POWERNV */
#ifdef CONFIG_PPC64
/* instantiate sml */
@@ -2953,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* On non-powermacs, put all CPUs in spin-loops.
*
* PowerMacs use a different mechanism to spin CPUs
+ *
+ * (This must be done after instanciating RTAS)
*/
if (of_platform != PLATFORM_POWERMAC &&
of_platform != PLATFORM_OPAL)
@@ -2961,10 +3040,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
/*
* Fill in some infos for use by the kernel later on
*/
- if (prom_memory_limit)
+ if (prom_memory_limit) {
+ __be64 val = cpu_to_be64(prom_memory_limit);
prom_setprop(prom.chosen, "/chosen", "linux,memory-limit",
- &prom_memory_limit,
- sizeof(prom_memory_limit));
+ &val, sizeof(val));
+ }
#ifdef CONFIG_PPC64
if (prom_iommu_off)
prom_setprop(prom.chosen, "/chosen", "linux,iommu-off",
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 3765da6be4f2..b0c263da219a 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -22,7 +22,8 @@ __secondary_hold_acknowledge __secondary_hold_spinloop __start
strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry
-boot_command_line __prom_init_toc_start __prom_init_toc_end"
+boot_command_line __prom_init_toc_start __prom_init_toc_end
+btext_setup_display"
NM="$1"
OBJ="$2"
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 4e1331b8eb33..6295e646f78c 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -7,28 +7,27 @@
#include <linux/of_address.h>
#include <asm/prom.h>
-void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
- unsigned long *busno, unsigned long *phys, unsigned long *size)
+void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
+ unsigned long *busno, unsigned long *phys,
+ unsigned long *size)
{
- const u32 *dma_window;
u32 cells;
- const unsigned char *prop;
-
- dma_window = dma_window_prop;
+ const __be32 *prop;
/* busno is always one cell */
- *busno = *(dma_window++);
+ *busno = of_read_number(dma_window, 1);
+ dma_window++;
prop = of_get_property(dn, "ibm,#dma-address-cells", NULL);
if (!prop)
prop = of_get_property(dn, "#address-cells", NULL);
- cells = prop ? *(u32 *)prop : of_n_addr_cells(dn);
+ cells = prop ? of_read_number(prop, 1) : of_n_addr_cells(dn);
*phys = of_read_number(dma_window, cells);
dma_window += cells;
prop = of_get_property(dn, "ibm,#dma-size-cells", NULL);
- cells = prop ? *(u32 *)prop : of_n_size_cells(dn);
+ cells = prop ? of_read_number(prop, 1) : of_n_size_cells(dn);
*size = of_read_number(dma_window, cells);
}
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 98c2fc198712..9a0d24c390a3 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -975,16 +975,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
hw_brk.len = 8;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- if (ptrace_get_breakpoints(task) < 0)
- return -ESRCH;
-
bp = thread->ptrace_bps[0];
if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
if (bp) {
unregister_hw_breakpoint(bp);
thread->ptrace_bps[0] = NULL;
}
- ptrace_put_breakpoints(task);
return 0;
}
if (bp) {
@@ -997,11 +993,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
ret = modify_user_hw_breakpoint(bp, &attr);
if (ret) {
- ptrace_put_breakpoints(task);
return ret;
}
thread->ptrace_bps[0] = bp;
- ptrace_put_breakpoints(task);
thread->hw_brk = hw_brk;
return 0;
}
@@ -1016,12 +1010,9 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
ptrace_triggered, NULL, task);
if (IS_ERR(bp)) {
thread->ptrace_bps[0] = NULL;
- ptrace_put_breakpoints(task);
return PTR_ERR(bp);
}
- ptrace_put_breakpoints(task);
-
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
task->thread.hw_brk = hw_brk;
#else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -1440,24 +1431,19 @@ static long ppc_set_hwdebug(struct task_struct *child,
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
brk.type |= HW_BRK_TYPE_WRITE;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- if (ptrace_get_breakpoints(child) < 0)
- return -ESRCH;
-
/*
* Check if the request is for 'range' breakpoints. We can
* support it if range < 8 bytes.
*/
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) {
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
len = bp_info->addr2 - bp_info->addr;
- } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
- ptrace_put_breakpoints(child);
+ else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ len = 1;
+ else
return -EINVAL;
- }
bp = thread->ptrace_bps[0];
- if (bp) {
- ptrace_put_breakpoints(child);
+ if (bp)
return -ENOSPC;
- }
/* Create a new breakpoint request if one doesn't exist already */
hw_breakpoint_init(&attr);
@@ -1469,11 +1455,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
ptrace_triggered, NULL, child);
if (IS_ERR(bp)) {
thread->ptrace_bps[0] = NULL;
- ptrace_put_breakpoints(child);
return PTR_ERR(bp);
}
- ptrace_put_breakpoints(child);
return 1;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
@@ -1517,16 +1501,12 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
return -EINVAL;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- if (ptrace_get_breakpoints(child) < 0)
- return -ESRCH;
-
bp = thread->ptrace_bps[0];
if (bp) {
unregister_hw_breakpoint(bp);
thread->ptrace_bps[0] = NULL;
} else
ret = -ENOENT;
- ptrace_put_breakpoints(child);
return ret;
#else /* CONFIG_HAVE_HW_BREAKPOINT */
if (child->thread.hw_brk.address == 0)
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index ef46ba6e094f..f366fedb0872 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -166,7 +166,7 @@ ha16:
/* R_PPC_ADDR16_LO */
lo16:
cmpwi r4, R_PPC_ADDR16_LO
- bne nxtrela
+ bne unknown_type
lwz r4, 0(r9) /* r_offset */
lwz r0, 8(r9) /* r_addend */
add r0, r0, r3
@@ -191,6 +191,7 @@ nxtrela:
dcbst r4,r7
sync /* Ensure the data is flushed before icbi */
icbi r4,r7
+unknown_type:
cmpwi r8, 0 /* relasz = 0 ? */
ble done
add r9, r9, r6 /* move to next entry in the .rela table */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 52add6f3e201..4cf674d7d5ae 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -91,7 +91,7 @@ static void unlock_rtas(unsigned long flags)
* are designed only for very early low-level debugging, which
* is why the token is hard-coded to 10.
*/
-static void call_rtas_display_status(char c)
+static void call_rtas_display_status(unsigned char c)
{
struct rtas_args *args = &rtas.args;
unsigned long s;
@@ -100,11 +100,11 @@ static void call_rtas_display_status(char c)
return;
s = lock_rtas();
- args->token = 10;
- args->nargs = 1;
- args->nret = 1;
- args->rets = (rtas_arg_t *)&(args->args[1]);
- args->args[0] = (unsigned char)c;
+ args->token = cpu_to_be32(10);
+ args->nargs = cpu_to_be32(1);
+ args->nret = cpu_to_be32(1);
+ args->rets = &(args->args[1]);
+ args->args[0] = cpu_to_be32(c);
enter_rtas(__pa(args));
@@ -204,7 +204,7 @@ void rtas_progress(char *s, unsigned short hex)
{
struct device_node *root;
int width;
- const int *p;
+ const __be32 *p;
char *os;
static int display_character, set_indicator;
static int display_width, display_lines, form_feed;
@@ -221,13 +221,13 @@ void rtas_progress(char *s, unsigned short hex)
if ((root = of_find_node_by_path("/rtas"))) {
if ((p = of_get_property(root,
"ibm,display-line-length", NULL)))
- display_width = *p;
+ display_width = be32_to_cpu(*p);
if ((p = of_get_property(root,
"ibm,form-feed", NULL)))
- form_feed = *p;
+ form_feed = be32_to_cpu(*p);
if ((p = of_get_property(root,
"ibm,display-number-of-lines", NULL)))
- display_lines = *p;
+ display_lines = be32_to_cpu(*p);
row_width = of_get_property(root,
"ibm,display-truncation-length", NULL);
of_node_put(root);
@@ -322,11 +322,11 @@ EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */
int rtas_token(const char *service)
{
- const int *tokp;
+ const __be32 *tokp;
if (rtas.dev == NULL)
return RTAS_UNKNOWN_SERVICE;
tokp = of_get_property(rtas.dev, service, NULL);
- return tokp ? *tokp : RTAS_UNKNOWN_SERVICE;
+ return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE;
}
EXPORT_SYMBOL(rtas_token);
@@ -380,11 +380,11 @@ static char *__fetch_rtas_last_error(char *altbuf)
bufsz = rtas_get_error_log_max();
- err_args.token = rtas_last_error_token;
- err_args.nargs = 2;
- err_args.nret = 1;
- err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf);
- err_args.args[1] = bufsz;
+ err_args.token = cpu_to_be32(rtas_last_error_token);
+ err_args.nargs = cpu_to_be32(2);
+ err_args.nret = cpu_to_be32(1);
+ err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf));
+ err_args.args[1] = cpu_to_be32(bufsz);
err_args.args[2] = 0;
save_args = rtas.args;
@@ -433,13 +433,13 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
s = lock_rtas();
rtas_args = &rtas.args;
- rtas_args->token = token;
- rtas_args->nargs = nargs;
- rtas_args->nret = nret;
- rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]);
+ rtas_args->token = cpu_to_be32(token);
+ rtas_args->nargs = cpu_to_be32(nargs);
+ rtas_args->nret = cpu_to_be32(nret);
+ rtas_args->rets = &(rtas_args->args[nargs]);
va_start(list, outputs);
for (i = 0; i < nargs; ++i)
- rtas_args->args[i] = va_arg(list, rtas_arg_t);
+ rtas_args->args[i] = cpu_to_be32(va_arg(list, __u32));
va_end(list);
for (i = 0; i < nret; ++i)
@@ -449,13 +449,13 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
/* A -1 return code indicates that the last command couldn't
be completed due to a hardware error. */
- if (rtas_args->rets[0] == -1)
+ if (be32_to_cpu(rtas_args->rets[0]) == -1)
buff_copy = __fetch_rtas_last_error(NULL);
if (nret > 1 && outputs != NULL)
for (i = 0; i < nret-1; ++i)
- outputs[i] = rtas_args->rets[i+1];
- ret = (nret > 0)? rtas_args->rets[0]: 0;
+ outputs[i] = be32_to_cpu(rtas_args->rets[i+1]);
+ ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0;
unlock_rtas(s);
@@ -588,8 +588,8 @@ bool rtas_indicator_present(int token, int *maxindex)
{
int proplen, count, i;
const struct indicator_elem {
- u32 token;
- u32 maxindex;
+ __be32 token;
+ __be32 maxindex;
} *indicators;
indicators = of_get_property(rtas.dev, "rtas-indicators", &proplen);
@@ -599,10 +599,10 @@ bool rtas_indicator_present(int token, int *maxindex)
count = proplen / sizeof(struct indicator_elem);
for (i = 0; i < count; i++) {
- if (indicators[i].token != token)
+ if (__be32_to_cpu(indicators[i].token) != token)
continue;
if (maxindex)
- *maxindex = indicators[i].maxindex;
+ *maxindex = __be32_to_cpu(indicators[i].maxindex);
return true;
}
@@ -1097,19 +1097,19 @@ void __init rtas_initialize(void)
*/
rtas.dev = of_find_node_by_name(NULL, "rtas");
if (rtas.dev) {
- const u32 *basep, *entryp, *sizep;
+ const __be32 *basep, *entryp, *sizep;
basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
sizep = of_get_property(rtas.dev, "rtas-size", NULL);
if (basep != NULL && sizep != NULL) {
- rtas.base = *basep;
- rtas.size = *sizep;
+ rtas.base = __be32_to_cpu(*basep);
+ rtas.size = __be32_to_cpu(*sizep);
entryp = of_get_property(rtas.dev,
"linux,rtas-entry", NULL);
if (entryp == NULL) /* Ugh */
rtas.entry = rtas.base;
else
- rtas.entry = *entryp;
+ rtas.entry = __be32_to_cpu(*entryp);
} else
rtas.dev = NULL;
}
@@ -1172,7 +1172,7 @@ int __init early_init_dt_scan_rtas(unsigned long node,
static arch_spinlock_t timebase_lock;
static u64 timebase = 0;
-void __cpuinit rtas_give_timebase(void)
+void rtas_give_timebase(void)
{
unsigned long flags;
@@ -1189,7 +1189,7 @@ void __cpuinit rtas_give_timebase(void)
local_irq_restore(flags);
}
-void __cpuinit rtas_take_timebase(void)
+void rtas_take_timebase(void)
{
while (!timebase)
barrier();
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 63d051f5b7a5..3d261c071fc8 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -436,7 +436,8 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
- const int *intserv;
+ const __be32 *intserv;
+ __be32 cpu_be;
int j, len;
DBG(" * %s...\n", dn->full_name);
@@ -450,15 +451,17 @@ void __init smp_setup_cpu_maps(void)
} else {
DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n");
intserv = of_get_property(dn, "reg", NULL);
- if (!intserv)
- intserv = &cpu; /* assume logical == phys */
+ if (!intserv) {
+ cpu_be = cpu_to_be32(cpu);
+ intserv = &cpu_be; /* assume logical == phys */
+ }
}
for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
DBG(" thread %d -> cpu %d (hard id %d)\n",
- j, cpu, intserv[j]);
+ j, cpu, be32_to_cpu(intserv[j]));
set_cpu_present(cpu, true);
- set_hard_smp_processor_id(cpu, intserv[j]);
+ set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
set_cpu_possible(cpu, true);
cpu++;
}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index a8f54ecb091f..a4bbcae72578 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -38,6 +38,7 @@
#include <asm/serial.h>
#include <asm/udbg.h>
#include <asm/mmu_context.h>
+#include <asm/epapr_hcalls.h>
#include "setup.h"
@@ -128,6 +129,8 @@ notrace void __init machine_init(u64 dt_ptr)
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
+ epapr_paravirt_early_init();
+
early_init_mmu();
probe_machine();
@@ -326,5 +329,4 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff */
mmu_context_init();
-
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e379d3fd1694..278ca93e1f28 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -10,7 +10,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#undef DEBUG
+#define DEBUG
#include <linux/export.h>
#include <linux/string.h>
@@ -66,6 +66,7 @@
#include <asm/code-patching.h>
#include <asm/kvm_ppc.h>
#include <asm/hugetlb.h>
+#include <asm/epapr_hcalls.h>
#include "setup.h"
@@ -76,7 +77,7 @@
#endif
int boot_cpuid = 0;
-int __initdata spinning_secondaries;
+int spinning_secondaries;
u64 ppc64_pft_size;
/* Pick defaults since we might want to patch instructions
@@ -215,6 +216,8 @@ void __init early_setup(unsigned long dt_ptr)
*/
early_init_devtree(__va(dt_ptr));
+ epapr_paravirt_early_init();
+
/* Now we know the logical id of our boot cpu, setup the paca. */
setup_paca(&paca[boot_cpuid]);
fixup_boot_paca();
@@ -229,6 +232,8 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ kvm_cma_reserve();
+
/*
* Reserve any gigantic pages requested on the command line.
* memblock needs to have been initialized by the time this is
@@ -237,6 +242,18 @@ void __init early_setup(unsigned long dt_ptr)
reserve_hugetlb_gpages();
DBG(" <- early_setup()\n");
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+ /*
+ * This needs to be done *last* (after the above DBG() even)
+ *
+ * Right after we return from this function, we turn on the MMU
+ * which means the real-mode access trick that btext does will
+ * no longer work, it needs to switch to using a real MMU
+ * mapping. This call will ensure that it does
+ */
+ btext_map();
+#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
}
#ifdef CONFIG_SMP
@@ -305,14 +322,14 @@ static void __init initialize_cache_info(void)
* d-cache and i-cache sizes... -Peter
*/
if (num_cpus == 1) {
- const u32 *sizep, *lsizep;
+ const __be32 *sizep, *lsizep;
u32 size, lsize;
size = 0;
lsize = cur_cpu_spec->dcache_bsize;
sizep = of_get_property(np, "d-cache-size", NULL);
if (sizep != NULL)
- size = *sizep;
+ size = be32_to_cpu(*sizep);
lsizep = of_get_property(np, "d-cache-block-size",
NULL);
/* fallback if block size missing */
@@ -321,8 +338,8 @@ static void __init initialize_cache_info(void)
"d-cache-line-size",
NULL);
if (lsizep != NULL)
- lsize = *lsizep;
- if (sizep == 0 || lsizep == 0)
+ lsize = be32_to_cpu(*lsizep);
+ if (sizep == NULL || lsizep == NULL)
DBG("Argh, can't find dcache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
@@ -335,7 +352,7 @@ static void __init initialize_cache_info(void)
lsize = cur_cpu_spec->icache_bsize;
sizep = of_get_property(np, "i-cache-size", NULL);
if (sizep != NULL)
- size = *sizep;
+ size = be32_to_cpu(*sizep);
lsizep = of_get_property(np, "i-cache-block-size",
NULL);
if (lsizep == NULL)
@@ -343,8 +360,8 @@ static void __init initialize_cache_info(void)
"i-cache-line-size",
NULL);
if (lsizep != NULL)
- lsize = *lsizep;
- if (sizep == 0 || lsizep == 0)
+ lsize = be32_to_cpu(*lsizep);
+ if (sizep == NULL || lsizep == NULL)
DBG("Argh, can't find icache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
@@ -609,8 +626,6 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff */
mmu_context_init();
- kvm_linear_init();
-
/* Interrupt code needs to be 64K-aligned */
if ((unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n",
@@ -701,8 +716,7 @@ void __init setup_per_cpu_areas(void)
#endif
-#ifdef CONFIG_PPC_INDIRECT_IO
+#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
-#endif /* CONFIG_PPC_INDIRECT_IO */
-
+#endif
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 201385c3a1ae..bebdf1a1a540 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
* altivec/spe instructions at some point.
*/
static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
- int sigret, int ctx_has_vsx_region)
+ struct mcontext __user *tm_frame, int sigret,
+ int ctx_has_vsx_region)
{
unsigned long msr = regs->msr;
@@ -435,7 +436,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* use altivec. Since VSCR only contains 32 bits saved in the least
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
* most significant bits of that same vector. --BenH
+ * Note that the current VRSAVE value is in the SPR at this point.
*/
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.vrsave = mfspr(SPRN_VRSAVE);
if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
return 1;
#endif /* CONFIG_ALTIVEC */
@@ -475,6 +479,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
return 1;
+ /* We need to write 0 the MSR top 32 bits in the tm frame so that we
+ * can check it on the restore to see if TM is active
+ */
+ if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
+ return 1;
+
if (sigret) {
/* Set up the sigreturn trampoline: li r0,sigret; sc */
if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
@@ -550,6 +560,8 @@ static int save_tm_user_regs(struct pt_regs *regs,
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
* most significant bits of that same vector. --BenH
*/
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.vrsave = mfspr(SPRN_VRSAVE);
if (__put_user(current->thread.vrsave,
(u32 __user *)&frame->mc_vregs[32]))
return 1;
@@ -689,6 +701,8 @@ static long restore_user_regs(struct pt_regs *regs,
/* Always get VRSAVE back */
if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
return 1;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
if (copy_fpr_from_user(current, &sr->mc_fregs))
return 1;
@@ -747,7 +761,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *tm_sr)
{
long err;
- unsigned long msr;
+ unsigned long msr, msr_hi;
#ifdef CONFIG_VSX
int i;
#endif
@@ -802,6 +816,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
__get_user(current->thread.transact_vrsave,
(u32 __user *)&tm_sr->mc_vregs[32]))
return 1;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
@@ -852,8 +868,11 @@ static long restore_tm_user_regs(struct pt_regs *regs,
tm_enable();
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&current->thread, msr);
- /* The task has moved into TM state S, so ensure MSR reflects this */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S;
+ /* Get the top half of the MSR */
+ if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
+ return 1;
+ /* Pull in MSR TM from user context */
+ regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
/* This loads the speculative FP/VEC state, if used */
if (msr & MSR_FP) {
@@ -952,6 +971,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
{
struct rt_sigframe __user *rt_sf;
struct mcontext __user *frame;
+ struct mcontext __user *tm_frame = NULL;
void __user *addr;
unsigned long newsp = 0;
int sigret;
@@ -985,23 +1005,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ tm_frame = &rt_sf->uc_transact.uc_mcontext;
if (MSR_TM_ACTIVE(regs->msr)) {
- if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext,
- &rt_sf->uc_transact.uc_mcontext, sigret))
+ if (save_tm_user_regs(regs, frame, tm_frame, sigret))
goto badframe;
}
else
#endif
- if (save_user_regs(regs, frame, sigret, 1))
+ {
+ if (save_user_regs(regs, frame, tm_frame, sigret, 1))
goto badframe;
+ }
regs->link = tramp;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr)) {
if (__put_user((unsigned long)&rt_sf->uc_transact,
&rt_sf->uc.uc_link)
- || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext),
- &rt_sf->uc_transact.uc_regs))
+ || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs))
goto badframe;
}
else
@@ -1170,7 +1191,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
mctx = (struct mcontext __user *)
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
- || save_user_regs(regs, mctx, 0, ctx_has_vsx_region)
+ || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
|| put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
|| __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
return -EFAULT;
@@ -1233,7 +1254,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
goto bad;
- if (MSR_TM_SUSPENDED(msr_hi<<32)) {
+ if (MSR_TM_ACTIVE(msr_hi<<32)) {
/* We only recheckpoint on return if we're
* transaction.
*/
@@ -1392,6 +1413,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
{
struct sigcontext __user *sc;
struct sigframe __user *frame;
+ struct mcontext __user *tm_mctx = NULL;
unsigned long newsp = 0;
int sigret;
unsigned long tramp;
@@ -1425,6 +1447,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ tm_mctx = &frame->mctx_transact;
if (MSR_TM_ACTIVE(regs->msr)) {
if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
sigret))
@@ -1432,8 +1455,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
}
else
#endif
- if (save_user_regs(regs, &frame->mctx, sigret, 1))
+ {
+ if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
goto badframe;
+ }
regs->link = tramp;
@@ -1481,16 +1506,22 @@ badframe:
long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
struct pt_regs *regs)
{
+ struct sigframe __user *sf;
struct sigcontext __user *sc;
struct sigcontext sigctx;
struct mcontext __user *sr;
void __user *addr;
sigset_t set;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ struct mcontext __user *mcp, *tm_mcp;
+ unsigned long msr_hi;
+#endif
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
- sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
+ sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
+ sc = &sf->sctx;
addr = sc;
if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
goto badframe;
@@ -1507,11 +1538,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
#endif
set_current_blocked(&set);
- sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
- addr = sr;
- if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
- || restore_user_regs(regs, sr, 1))
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ mcp = (struct mcontext __user *)&sf->mctx;
+ tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
+ if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
goto badframe;
+ if (MSR_TM_ACTIVE(msr_hi<<32)) {
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto badframe;
+ if (restore_tm_user_regs(regs, mcp, tm_mcp))
+ goto badframe;
+ } else
+#endif
+ {
+ sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
+ addr = sr;
+ if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
+ || restore_user_regs(regs, sr, 1))
+ goto badframe;
+ }
set_thread_flag(TIF_RESTOREALL);
return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 345947367ec0..f93ec2835a13 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -96,8 +96,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
unsigned long msr = regs->msr;
long err = 0;
- flush_fp_to_thread(current);
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
@@ -114,6 +112,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.vrsave = mfspr(SPRN_VRSAVE);
err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
#else /* CONFIG_ALTIVEC */
err |= __put_user(0, &sc->v_regs);
@@ -217,6 +217,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.vrsave = mfspr(SPRN_VRSAVE);
err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
if (msr & MSR_VEC)
err |= __put_user(current->thread.transact_vrsave,
@@ -346,16 +348,18 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
- if (v_regs != 0 && (msr & MSR_VEC) != 0)
+ if (v_regs != NULL && (msr & MSR_VEC) != 0)
err |= __copy_from_user(current->thread.vr, v_regs,
33 * sizeof(vector128));
else if (current->thread.used_vr)
memset(current->thread.vr, 0, 33 * sizeof(vector128));
/* Always get VRSAVE back */
- if (v_regs != 0)
+ if (v_regs != NULL)
err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
else
current->thread.vrsave = 0;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
err |= copy_fpr_from_user(current, &sc->fp_regs);
@@ -410,6 +414,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
/* get MSR separately, transfer the LE bit if doing signal return */
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ /* pull in MSR TM from user context */
+ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+
+ /* pull in MSR LE from user context */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
/* The following non-GPR non-FPR non-VR state is also checkpointed: */
@@ -459,7 +467,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
tm_v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
- if (v_regs != 0 && tm_v_regs != 0 && (msr & MSR_VEC) != 0) {
+ if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
err |= __copy_from_user(current->thread.vr, v_regs,
33 * sizeof(vector128));
err |= __copy_from_user(current->thread.transact_vr, tm_v_regs,
@@ -470,7 +478,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
memset(current->thread.transact_vr, 0, 33 * sizeof(vector128));
}
/* Always get VRSAVE back */
- if (v_regs != 0 && tm_v_regs != 0) {
+ if (v_regs != NULL && tm_v_regs != NULL) {
err |= __get_user(current->thread.vrsave,
(u32 __user *)&v_regs[33]);
err |= __get_user(current->thread.transact_vrsave,
@@ -480,6 +488,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
current->thread.vrsave = 0;
current->thread.transact_vrsave = 0;
}
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
err |= copy_fpr_from_user(current, &sc->fp_regs);
@@ -505,8 +515,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
tm_enable();
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&current->thread, msr);
- /* The task has moved into TM state S, so ensure MSR reflects this: */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33);
/* This loads the speculative FP/VEC state, if used */
if (msr & MSR_FP) {
@@ -654,7 +662,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
- if (MSR_TM_SUSPENDED(msr)) {
+ if (MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
if (__get_user(uc_transact, &uc->uc_link))
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ee7ac5e6e28a..8e59abc237d7 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -81,6 +81,28 @@ int smt_enabled_at_boot = 1;
static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
+/*
+ * Returns 1 if the specified cpu should be brought up during boot.
+ * Used to inhibit booting threads if they've been disabled or
+ * limited on the command line
+ */
+int smp_generic_cpu_bootable(unsigned int nr)
+{
+ /* Special case - we inhibit secondary thread startup
+ * during boot if the user requests it.
+ */
+ if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
+ if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
+ return 0;
+ if (smt_enabled_at_boot
+ && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
+ return 0;
+ }
+
+ return 1;
+}
+
+
#ifdef CONFIG_PPC64
int smp_generic_kick_cpu(int nr)
{
@@ -172,7 +194,7 @@ int smp_request_message_ipi(int virq, int msg)
#endif
err = request_irq(virq, smp_ipi_action[msg],
IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
- smp_ipi_name[msg], 0);
+ smp_ipi_name[msg], NULL);
WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
virq, smp_ipi_name[msg], err);
@@ -210,6 +232,12 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
smp_ops->cause_ipi(cpu, info->data);
}
+#ifdef __BIG_ENDIAN__
+#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#else
+#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#endif
+
irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = &__get_cpu_var(ipi_message);
@@ -219,19 +247,14 @@ irqreturn_t smp_ipi_demux(void)
do {
all = xchg(&info->messages, 0);
-
-#ifdef __BIG_ENDIAN
- if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+ if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
- if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
+ if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
scheduler_ipi();
- if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
+ if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE))
generic_smp_call_function_single_interrupt();
- if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
+ if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
debug_ipi_action(0, NULL);
-#else
-#error Unsupported ENDIAN
-#endif
} while (info->messages);
return IRQ_HANDLED;
@@ -480,7 +503,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
secondary_ti = current_set[cpu] = ti;
}
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc, c;
@@ -574,6 +597,22 @@ out:
return id;
}
+/* Return the value of the chip-id property corresponding
+ * to the given logical cpu.
+ */
+int cpu_to_chip_id(int cpu)
+{
+ struct device_node *np;
+
+ np = of_get_cpu_node(cpu, NULL);
+ if (!np)
+ return -1;
+
+ of_node_put(np);
+ return of_get_ibm_chip_id(np);
+}
+EXPORT_SYMBOL(cpu_to_chip_id);
+
/* Helper routines for cpu to core mapping */
int cpu_core_index_of_thread(int cpu)
{
@@ -587,6 +626,33 @@ int cpu_first_thread_of_core(int core)
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
+static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
+{
+ const struct cpumask *mask;
+ struct device_node *np;
+ int i, plen;
+ const __be32 *prop;
+
+ mask = add ? cpu_online_mask : cpu_present_mask;
+ for_each_cpu(i, mask) {
+ np = of_get_cpu_node(i, NULL);
+ if (!np)
+ continue;
+ prop = of_get_property(np, "ibm,chip-id", &plen);
+ if (prop && plen == sizeof(int) &&
+ of_read_number(prop, 1) == chipid) {
+ if (add) {
+ cpumask_set_cpu(cpu, cpu_core_mask(i));
+ cpumask_set_cpu(i, cpu_core_mask(cpu));
+ } else {
+ cpumask_clear_cpu(cpu, cpu_core_mask(i));
+ cpumask_clear_cpu(i, cpu_core_mask(cpu));
+ }
+ }
+ of_node_put(np);
+ }
+}
+
/* Must be called when no change can occur to cpu_present_mask,
* i.e. during cpu online or offline.
*/
@@ -609,11 +675,51 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
+static void traverse_core_siblings(int cpu, bool add)
+{
+ struct device_node *l2_cache, *np;
+ const struct cpumask *mask;
+ int i, chip, plen;
+ const __be32 *prop;
+
+ /* First see if we have ibm,chip-id properties in cpu nodes */
+ np = of_get_cpu_node(cpu, NULL);
+ if (np) {
+ chip = -1;
+ prop = of_get_property(np, "ibm,chip-id", &plen);
+ if (prop && plen == sizeof(int))
+ chip = of_read_number(prop, 1);
+ of_node_put(np);
+ if (chip >= 0) {
+ traverse_siblings_chip_id(cpu, add, chip);
+ return;
+ }
+ }
+
+ l2_cache = cpu_to_l2cache(cpu);
+ mask = add ? cpu_online_mask : cpu_present_mask;
+ for_each_cpu(i, mask) {
+ np = cpu_to_l2cache(i);
+ if (!np)
+ continue;
+ if (np == l2_cache) {
+ if (add) {
+ cpumask_set_cpu(cpu, cpu_core_mask(i));
+ cpumask_set_cpu(i, cpu_core_mask(cpu));
+ } else {
+ cpumask_clear_cpu(cpu, cpu_core_mask(i));
+ cpumask_clear_cpu(i, cpu_core_mask(cpu));
+ }
+ }
+ of_node_put(np);
+ }
+ of_node_put(l2_cache);
+}
+
/* Activate a secondary processor. */
-__cpuinit void start_secondary(void *unused)
+void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
- struct device_node *l2_cache;
int i, base;
atomic_inc(&init_mm.mm_count);
@@ -637,12 +743,10 @@ __cpuinit void start_secondary(void *unused)
vdso_getcpu_init();
#endif
- notify_cpu_starting(cpu);
- set_cpu_online(cpu, true);
/* Update sibling maps */
base = cpu_first_thread_sibling(cpu);
for (i = 0; i < threads_per_core; i++) {
- if (cpu_is_offline(base + i))
+ if (cpu_is_offline(base + i) && (cpu != base + i))
continue;
cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
@@ -654,18 +758,11 @@ __cpuinit void start_secondary(void *unused)
cpumask_set_cpu(cpu, cpu_core_mask(base + i));
cpumask_set_cpu(base + i, cpu_core_mask(cpu));
}
- l2_cache = cpu_to_l2cache(cpu);
- for_each_online_cpu(i) {
- struct device_node *np = cpu_to_l2cache(i);
- if (!np)
- continue;
- if (np == l2_cache) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- }
- of_node_put(np);
- }
- of_node_put(l2_cache);
+ traverse_core_siblings(cpu, true);
+
+ smp_wmb();
+ notify_cpu_starting(cpu);
+ set_cpu_online(cpu, true);
local_irq_enable();
@@ -717,7 +814,6 @@ int arch_sd_sibling_asym_packing(void)
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
- struct device_node *l2_cache;
int cpu = smp_processor_id();
int base, i;
int err;
@@ -737,20 +833,7 @@ int __cpu_disable(void)
cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
}
-
- l2_cache = cpu_to_l2cache(cpu);
- for_each_present_cpu(i) {
- struct device_node *np = cpu_to_l2cache(i);
- if (!np)
- continue;
- if (np == l2_cache) {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- of_node_put(np);
- }
- of_node_put(l2_cache);
-
+ traverse_core_siblings(cpu, false);
return 0;
}
diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c
deleted file mode 100644
index 29b2f81dd709..000000000000
--- a/arch/powerpc/kernel/softemu8xx.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Software emulation of some PPC instructions for the 8xx core.
- *
- * Copyright (C) 1998 Dan Malek (dmalek@jlc.net)
- *
- * Software floating emuation for the MPC8xx processor. I did this mostly
- * because it was easier than trying to get the libraries compiled for
- * software floating point. The goal is still to get the libraries done,
- * but I lost patience and needed some hacks to at least get init and
- * shells running. The first problem is the setjmp/longjmp that save
- * and restore the floating point registers.
- *
- * For this emulation, our working registers are found on the register
- * save area.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/interrupt.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-/* Eventually we may need a look-up table, but this works for now.
-*/
-#define LFS 48
-#define LFD 50
-#define LFDU 51
-#define STFD 54
-#define STFDU 55
-#define FMR 63
-
-void print_8xx_pte(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
-
- printk(" pte @ 0x%8lx: ", addr);
- pgd = pgd_offset(mm, addr & PAGE_MASK);
- if (pgd) {
- pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK),
- addr & PAGE_MASK);
- if (pmd && pmd_present(*pmd)) {
- pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
- if (pte) {
- printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
- (long)pgd, (long)pte, (long)pte_val(*pte));
-#define pp ((long)pte_val(*pte))
- printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
- "CI: %lx v: %lx\n",
- pp>>12, /* rpn */
- (pp>>10)&3, /* pp */
- (pp>>3)&1, /* small */
- (pp>>2)&1, /* shared */
- (pp>>1)&1, /* cache inhibit */
- pp&1 /* valid */
- );
-#undef pp
- }
- else {
- printk("no pte\n");
- }
- }
- else {
- printk("no pmd\n");
- }
- }
- else {
- printk("no pgd\n");
- }
-}
-
-int get_8xx_pte(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- int retval = 0;
-
- pgd = pgd_offset(mm, addr & PAGE_MASK);
- if (pgd) {
- pmd = pmd_offset(pud_offset(pgd, addr & PAGE_MASK),
- addr & PAGE_MASK);
- if (pmd && pmd_present(*pmd)) {
- pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
- if (pte) {
- retval = (int)pte_val(*pte);
- }
- }
- }
- return retval;
-}
-
-/*
- * We return 0 on success, 1 on unimplemented instruction, and EFAULT
- * if a load/store faulted.
- */
-int Soft_emulate_8xx(struct pt_regs *regs)
-{
- u32 inst, instword;
- u32 flreg, idxreg, disp;
- int retval;
- s16 sdisp;
- u32 *ea, *ip;
-
- retval = 0;
-
- instword = *((u32 *)regs->nip);
- inst = instword >> 26;
-
- flreg = (instword >> 21) & 0x1f;
- idxreg = (instword >> 16) & 0x1f;
- disp = instword & 0xffff;
-
- ea = (u32 *)(regs->gpr[idxreg] + disp);
- ip = (u32 *)&current->thread.TS_FPR(flreg);
-
- switch ( inst )
- {
- case LFD:
- /* this is a 16 bit quantity that is sign extended
- * so use a signed short here -- Cort
- */
- sdisp = (instword & 0xffff);
- ea = (u32 *)(regs->gpr[idxreg] + sdisp);
- if (copy_from_user(ip, ea, sizeof(double)))
- retval = -EFAULT;
- break;
-
- case LFDU:
- if (copy_from_user(ip, ea, sizeof(double)))
- retval = -EFAULT;
- else
- regs->gpr[idxreg] = (u32)ea;
- break;
- case LFS:
- sdisp = (instword & 0xffff);
- ea = (u32 *)(regs->gpr[idxreg] + sdisp);
- if (copy_from_user(ip, ea, sizeof(float)))
- retval = -EFAULT;
- break;
- case STFD:
- /* this is a 16 bit quantity that is sign extended
- * so use a signed short here -- Cort
- */
- sdisp = (instword & 0xffff);
- ea = (u32 *)(regs->gpr[idxreg] + sdisp);
- if (copy_to_user(ea, ip, sizeof(double)))
- retval = -EFAULT;
- break;
-
- case STFDU:
- if (copy_to_user(ea, ip, sizeof(double)))
- retval = -EFAULT;
- else
- regs->gpr[idxreg] = (u32)ea;
- break;
- case FMR:
- /* assume this is a fp move -- Cort */
- memcpy(ip, &current->thread.TS_FPR((instword>>11)&0x1f),
- sizeof(double));
- break;
- default:
- retval = 1;
- printk("Bad emulation %s/%d\n"
- " NIP: %08lx instruction: %08x opcode: %x "
- "A: %x B: %x C: %x code: %x rc: %x\n",
- current->comm,current->pid,
- regs->nip,
- instword,inst,
- (instword>>16)&0x1f,
- (instword>>11)&0x1f,
- (instword>>6)&0x1f,
- (instword>>1)&0x3ff,
- instword&1);
- {
- int pa;
- print_8xx_pte(current->mm,regs->nip);
- pa = get_8xx_pte(current->mm,regs->nip) & PAGE_MASK;
- pa |= (regs->nip & ~PAGE_MASK);
- pa = (unsigned long)__va(pa);
- printk("Kernel VA for NIP %x ", pa);
- print_8xx_pte(current->mm,pa);
- }
- }
-
- if (retval == 0)
- regs->nip += 4;
-
- return retval;
-}
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 86ac1d90d02b..22045984835f 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -46,10 +46,19 @@
#define SL_r29 0xe8
#define SL_r30 0xf0
#define SL_r31 0xf8
-#define SL_SIZE SL_r31+8
+#define SL_SPRG1 0x100
+#define SL_TCR 0x108
+#define SL_SIZE SL_TCR+8
/* these macros rely on the save area being
* pointed to by r11 */
+
+#define SAVE_SPR(register) \
+ mfspr r0, SPRN_##register ;\
+ std r0, SL_##register(r11)
+#define RESTORE_SPR(register) \
+ ld r0, SL_##register(r11) ;\
+ mtspr SPRN_##register, r0
#define SAVE_SPECIAL(special) \
mf##special r0 ;\
std r0, SL_##special(r11)
@@ -103,8 +112,15 @@ _GLOBAL(swsusp_arch_suspend)
SAVE_REGISTER(r30)
SAVE_REGISTER(r31)
SAVE_SPECIAL(MSR)
- SAVE_SPECIAL(SDR1)
SAVE_SPECIAL(XER)
+#ifdef CONFIG_PPC_BOOK3S_64
+ SAVE_SPECIAL(SDR1)
+#else
+ SAVE_SPR(TCR)
+
+ /* Save SPRG1, SPRG1 be used save paca */
+ SAVE_SPR(SPRG1)
+#endif
/* we push the stack up 128 bytes but don't store the
* stack pointer on the stack like a real stackframe */
@@ -151,6 +167,7 @@ copy_page_loop:
bne+ copyloop
nothing_to_copy:
+#ifdef CONFIG_PPC_BOOK3S_64
/* flush caches */
lis r3, 0x10
mtctr r3
@@ -167,6 +184,7 @@ nothing_to_copy:
sync
tlbia
+#endif
ld r11,swsusp_save_area_ptr@toc(r2)
@@ -208,16 +226,39 @@ nothing_to_copy:
RESTORE_REGISTER(r29)
RESTORE_REGISTER(r30)
RESTORE_REGISTER(r31)
+
+#ifdef CONFIG_PPC_BOOK3S_64
/* can't use RESTORE_SPECIAL(MSR) */
ld r0, SL_MSR(r11)
mtmsrd r0, 0
RESTORE_SPECIAL(SDR1)
+#else
+ /* Restore SPRG1, be used to save paca */
+ ld r0, SL_SPRG1(r11)
+ mtsprg 1, r0
+
+ RESTORE_SPECIAL(MSR)
+
+ /* Restore TCR and clear any pending bits in TSR. */
+ RESTORE_SPR(TCR)
+ lis r0, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
+ mtspr SPRN_TSR, r0
+
+ /* Kick decrementer */
+ li r0, 1
+ mtdec r0
+
+ /* Invalidate all tlbs */
+ bl _tlbil_all
+#endif
RESTORE_SPECIAL(XER)
sync
addi r1,r1,-128
+#ifdef CONFIG_PPC_BOOK3S_64
bl slb_flush_and_rebolt
+#endif
bl do_after_copyback
addi r1,r1,128
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
index 11a39307dd71..0f204053e5b5 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -141,6 +141,14 @@ _GLOBAL(swsusp_arch_resume)
lis r11,swsusp_save_area@h
ori r11,r11,swsusp_save_area@l
+ /*
+ * Mappings from virtual addresses to physical addresses may be
+ * different than they were prior to restoring hibernation state.
+ * Invalidate the TLB so that the boot CPU is using the new
+ * mappings.
+ */
+ bl _tlbil_all
+
lwz r4,SL_SPRG0(r11)
mtsprg 0,r4
lwz r4,SL_SPRG1(r11)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e68a84568b8b..b4e667663d9b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -17,6 +17,7 @@
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
+#include <asm/firmware.h>
#include "cacheinfo.h"
@@ -179,15 +180,25 @@ SYSFS_PMCSETUP(spurr, SPRN_SPURR);
SYSFS_PMCSETUP(dscr, SPRN_DSCR);
SYSFS_PMCSETUP(pir, SPRN_PIR);
+/*
+ Lets only enable read for phyp resources and
+ enable write when needed with a separate function.
+ Lets be conservative and default to pseries.
+*/
static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
-static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
+static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
static DEVICE_ATTR(pir, 0400, show_pir, NULL);
unsigned long dscr_default = 0;
EXPORT_SYMBOL(dscr_default);
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+ attr->attr.mode |= 0200;
+}
+
static ssize_t show_dscr_default(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -341,7 +352,7 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
-static void __cpuinit register_cpu_online(unsigned int cpu)
+static void register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
@@ -394,8 +405,11 @@ static void __cpuinit register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, &dev_attr_mmcra);
- if (cpu_has_feature(CPU_FTR_PURR))
+ if (cpu_has_feature(CPU_FTR_PURR)) {
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ add_write_permission_dev_attr(&dev_attr_purr);
device_create_file(s, &dev_attr_purr);
+ }
if (cpu_has_feature(CPU_FTR_SPURR))
device_create_file(s, &dev_attr_spurr);
@@ -502,7 +516,7 @@ ssize_t arch_cpu_release(const char *buf, size_t count)
#endif /* CONFIG_HOTPLUG_CPU */
-static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
+static int sysfs_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
@@ -522,7 +536,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata sysfs_cpu_nb = {
+static struct notifier_block sysfs_cpu_nb = {
.notifier_call = sysfs_cpu_notify,
};
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 5fc29ad7e26f..192b051df97e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -210,18 +210,18 @@ static u64 scan_dispatch_log(u64 stop_tb)
if (!dtl)
return 0;
- if (i == vpa->dtl_idx)
+ if (i == be64_to_cpu(vpa->dtl_idx))
return 0;
- while (i < vpa->dtl_idx) {
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
if (dtl_consumer)
dtl_consumer(dtl, i);
- dtb = dtl->timebase;
- tb_delta = dtl->enqueue_to_dispatch_time +
- dtl->ready_to_enqueue_time;
+ dtb = be64_to_cpu(dtl->timebase);
+ tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
+ be32_to_cpu(dtl->ready_to_enqueue_time);
barrier();
- if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
/* buffer has overflowed */
- i = vpa->dtl_idx - N_DISPATCH_LOG;
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
continue;
}
@@ -269,7 +269,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
{
u64 stolen = 0;
- if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
+ if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) {
stolen = scan_dispatch_log(stop_tb);
get_paca()->system_time -= stolen;
}
@@ -612,7 +612,7 @@ unsigned long long sched_clock(void)
static int __init get_freq(char *name, int cells, unsigned long *val)
{
struct device_node *cpu;
- const unsigned int *fp;
+ const __be32 *fp;
int found = 0;
/* The cpu node should have timebase and clock frequency properties */
@@ -631,7 +631,6 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
return found;
}
-/* should become __cpuinit when secondary_cpu_time_init also is */
void start_cpu_decrementer(void)
{
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -1050,7 +1049,7 @@ static int __init rtc_init(void)
pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
- return PTR_RET(pdev);
+ return PTR_ERR_OR_ZERO(pdev);
}
module_init(rtc_init);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2da67e7a16d5..cd809eaa8b5c 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -79,6 +79,11 @@ _GLOBAL(tm_abort)
TABORT(R3)
blr
+ .section ".toc","aw"
+DSCR_DEFAULT:
+ .tc dscr_default[TC],dscr_default
+
+ .section ".text"
/* void tm_reclaim(struct thread_struct *thread,
* unsigned long orig_msr,
@@ -112,9 +117,19 @@ _GLOBAL(tm_reclaim)
std r3, STACK_PARAM(0)(r1)
SAVE_NVGPRS(r1)
+ /* We need to setup MSR for VSX register save instructions. Here we
+ * also clear the MSR RI since when we do the treclaim, we won't have a
+ * valid kernel pointer for a while. We clear RI here as it avoids
+ * adding another mtmsr closer to the treclaim. This makes the region
+ * maked as non-recoverable wider than it needs to be but it saves on
+ * inserting another mtmsrd later.
+ */
mfmsr r14
mr r15, r14
ori r15, r15, MSR_FP
+ li r16, MSR_RI
+ ori r16, r16, MSR_EE /* IRQs hard off */
+ andc r15, r15, r16
oris r15, r15, MSR_VEC@h
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
@@ -146,10 +161,10 @@ _GLOBAL(tm_reclaim)
mfvscr vr0
li r6, THREAD_TRANSACT_VSCR
stvx vr0, r3, r6
+dont_backup_vec:
mfspr r0, SPRN_VRSAVE
std r0, THREAD_TRANSACT_VRSAVE(r3)
-dont_backup_vec:
andi. r0, r4, MSR_FP
beq dont_backup_fp
@@ -178,11 +193,18 @@ dont_backup_fp:
std r1, PACATMSCRATCH(r13)
ld r1, PACAR1(r13)
+ /* Store the PPR in r11 and reset to decent value */
+ std r11, GPR11(r1) /* Temporary stash */
+ mfspr r11, SPRN_PPR
+ HMT_MEDIUM
+
/* Now get some more GPRS free */
std r7, GPR7(r1) /* Temporary stash */
std r12, GPR12(r1) /* '' '' '' */
ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */
+ std r11, THREAD_TM_PPR(r12) /* Store PPR and free r11 */
+
addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */
/* Make r7 look like an exception frame so that we
@@ -194,15 +216,19 @@ dont_backup_fp:
SAVE_GPR(0, r7) /* user r0 */
SAVE_GPR(2, r7) /* user r2 */
SAVE_4GPRS(3, r7) /* user r3-r6 */
- SAVE_4GPRS(8, r7) /* user r8-r11 */
+ SAVE_GPR(8, r7) /* user r8 */
+ SAVE_GPR(9, r7) /* user r9 */
+ SAVE_GPR(10, r7) /* user r10 */
ld r3, PACATMSCRATCH(r13) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
- ld r5, GPR12(r1) /* user r12 */
- GET_SCRATCH0(6) /* user r13 */
+ ld r5, GPR11(r1) /* user r11 */
+ ld r6, GPR12(r1) /* user r12 */
+ GET_SCRATCH0(8) /* user r13 */
std r3, GPR1(r7)
std r4, GPR7(r7)
- std r5, GPR12(r7)
- std r6, GPR13(r7)
+ std r5, GPR11(r7)
+ std r6, GPR12(r7)
+ std r8, GPR13(r7)
SAVE_NVGPRS(r7) /* user r14-r31 */
@@ -224,6 +250,14 @@ dont_backup_fp:
std r5, _CCR(r7)
std r6, _XER(r7)
+
+ /* ******************** TAR, DSCR ********** */
+ mfspr r3, SPRN_TAR
+ mfspr r4, SPRN_DSCR
+
+ std r3, THREAD_TM_TAR(r12)
+ std r4, THREAD_TM_DSCR(r12)
+
/* MSR and flags: We don't change CRs, and we don't need to alter
* MSR.
*/
@@ -239,7 +273,7 @@ dont_backup_fp:
std r3, THREAD_TM_TFHAR(r12)
std r4, THREAD_TM_TFIAR(r12)
- /* AMR and PPR are checkpointed too, but are unsupported by Linux. */
+ /* AMR is checkpointed too, but is unsupported by Linux. */
/* Restore original MSR/IRQ state & clear TM mode */
ld r14, TM_FRAME_L0(r1) /* Orig MSR */
@@ -255,6 +289,12 @@ dont_backup_fp:
mtcr r4
mtlr r0
ld r2, 40(r1)
+
+ /* Load system default DSCR */
+ ld r4, DSCR_DEFAULT@toc(r2)
+ ld r0, 0(r4)
+ mtspr SPRN_DSCR, r0
+
blr
@@ -322,11 +362,11 @@ _GLOBAL(tm_recheckpoint)
lvx vr0, r3, r5
mtvscr vr0
REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */
+dont_restore_vec:
ld r5, THREAD_VRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
-dont_restore_vec:
andi. r0, r4, MSR_FP
beq dont_restore_fp
@@ -338,35 +378,52 @@ dont_restore_fp:
mtmsr r6 /* FP/Vec off again! */
restore_gprs:
+
/* ******************** CR,LR,CCR,MSR ********** */
- ld r3, _CTR(r7)
- ld r4, _LINK(r7)
- ld r5, _CCR(r7)
- ld r6, _XER(r7)
+ ld r4, _CTR(r7)
+ ld r5, _LINK(r7)
+ ld r6, _CCR(r7)
+ ld r8, _XER(r7)
- mtctr r3
- mtlr r4
- mtcr r5
- mtxer r6
+ mtctr r4
+ mtlr r5
+ mtcr r6
+ mtxer r8
- /* MSR and flags: We don't change CRs, and we don't need to alter
- * MSR.
+ /* ******************** TAR ******************** */
+ ld r4, THREAD_TM_TAR(r3)
+ mtspr SPRN_TAR, r4
+
+ /* Load up the PPR and DSCR in GPRs only at this stage */
+ ld r5, THREAD_TM_DSCR(r3)
+ ld r6, THREAD_TM_PPR(r3)
+
+ /* Clear the MSR RI since we are about to change R1. EE is already off
*/
+ li r4, 0
+ mtmsrd r4, 1
REST_4GPRS(0, r7) /* GPR0-3 */
- REST_GPR(4, r7) /* GPR4-6 */
- REST_GPR(5, r7)
- REST_GPR(6, r7)
+ REST_GPR(4, r7) /* GPR4 */
REST_4GPRS(8, r7) /* GPR8-11 */
REST_2GPRS(12, r7) /* GPR12-13 */
REST_NVGPRS(r7) /* GPR14-31 */
- ld r7, GPR7(r7) /* GPR7 */
+ /* Load up PPR and DSCR here so we don't run with user values for long
+ */
+ mtspr SPRN_DSCR, r5
+ mtspr SPRN_PPR, r6
+
+ REST_GPR(5, r7) /* GPR5-7 */
+ REST_GPR(6, r7)
+ ld r7, GPR7(r7)
/* Commit register state as checkpointed state: */
TRECHKPT
+ HMT_MEDIUM
+
/* Our transactional state has now changed.
*
* Now just get out of here. Transactional (current) state will be
@@ -377,6 +434,10 @@ restore_gprs:
GET_PACA(r13)
GET_SCRATCH0(r1)
+ /* R1 is restored, so we are recoverable again. EE is still off */
+ li r4, MSR_RI
+ mtmsrd r4, 1
+
REST_NVGPRS(r1)
addi r1, r1, TM_FRAME_SIZE
@@ -385,6 +446,12 @@ restore_gprs:
mtcr r4
mtlr r0
ld r2, 40(r1)
+
+ /* Load system default DSCR */
+ ld r4, DSCR_DEFAULT@toc(r2)
+ ld r0, 0(r4)
+ mtspr SPRN_DSCR, r0
+
blr
/* ****************************************************************** */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f18c79c324ef..f783c932faeb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -44,9 +44,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/pmc.h>
-#ifdef CONFIG_PPC32
#include <asm/reg.h>
-#endif
#ifdef CONFIG_PMAC_BACKLIGHT
#include <asm/backlight.h>
#endif
@@ -62,6 +60,7 @@
#include <asm/switch_to.h>
#include <asm/tm.h>
#include <asm/debug.h>
+#include <sysdev/fsl_pci.h>
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -567,6 +566,8 @@ int machine_check_e500(struct pt_regs *regs)
if (reason & MCSR_BUS_RBERR) {
if (fsl_rio_mcheck_exception(regs))
return 1;
+ if (fsl_pci_mcheck_exception(regs))
+ return 1;
}
printk("Machine check in kernel mode.\n");
@@ -866,6 +867,10 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword)
u8 val;
u32 shift = 8 * (3 - (pos & 0x3));
+ /* if process is 32-bit, clear upper 32 bits of EA */
+ if ((regs->msr & MSR_64BIT) == 0)
+ EA &= 0xFFFFFFFF;
+
switch ((instword & PPC_INST_STRING_MASK)) {
case PPC_INST_LSWX:
case PPC_INST_LSWI:
@@ -960,7 +965,7 @@ static int emulate_instruction(struct pt_regs *regs)
u32 instword;
u32 rd;
- if (!user_mode(regs) || (regs->msr & MSR_LE))
+ if (!user_mode(regs))
return -EINVAL;
CHECK_FULL_REGS(regs);
@@ -1048,11 +1053,41 @@ int is_valid_bugaddr(unsigned long addr)
return is_kernel_addr(addr);
}
+#ifdef CONFIG_MATH_EMULATION
+static int emulate_math(struct pt_regs *regs)
+{
+ int ret;
+ extern int do_mathemu(struct pt_regs *regs);
+
+ ret = do_mathemu(regs);
+ if (ret >= 0)
+ PPC_WARN_EMULATED(math, regs);
+
+ switch (ret) {
+ case 0:
+ emulate_single_step(regs);
+ return 0;
+ case 1: {
+ int code = 0;
+ code = __parse_fpscr(current->thread.fpscr.val);
+ _exception(SIGFPE, regs, code, regs->nip);
+ return 0;
+ }
+ case -EFAULT:
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return 0;
+ }
+
+ return -1;
+}
+#else
+static inline int emulate_math(struct pt_regs *regs) { return -1; }
+#endif
+
void __kprobes program_check_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
- extern int do_mathemu(struct pt_regs *regs);
/* We can now get here via a FP Unavailable exception if the core
* has no FPU, in that case the reason flags will be 0 */
@@ -1114,34 +1149,30 @@ void __kprobes program_check_exception(struct pt_regs *regs)
}
#endif
+ /*
+ * If we took the program check in the kernel skip down to sending a
+ * SIGILL. The subsequent cases all relate to emulating instructions
+ * which we should only do for userspace. We also do not want to enable
+ * interrupts for kernel faults because that might lead to further
+ * faults, and loose the context of the original exception.
+ */
+ if (!user_mode(regs))
+ goto sigill;
+
/* We restore the interrupt state now */
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
-#ifdef CONFIG_MATH_EMULATION
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
* but there seems to be a hardware bug on the 405GP (RevD)
* that means ESR is sometimes set incorrectly - either to
* ESR_DST (!?) or 0. In the process of chasing this with the
* hardware people - not sure if it can happen on any illegal
* instruction or only on FP instructions, whether there is a
- * pattern to occurrences etc. -dgibson 31/Mar/2003 */
- switch (do_mathemu(regs)) {
- case 0:
- emulate_single_step(regs);
- goto bail;
- case 1: {
- int code = 0;
- code = __parse_fpscr(current->thread.fpscr.val);
- _exception(SIGFPE, regs, code, regs->nip);
- goto bail;
- }
- case -EFAULT:
- _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ * pattern to occurrences etc. -dgibson 31/Mar/2003
+ */
+ if (!emulate_math(regs))
goto bail;
- }
- /* fall through on any other errors */
-#endif /* CONFIG_MATH_EMULATION */
/* Try to emulate it if we should. */
if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
@@ -1156,6 +1187,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
}
}
+sigill:
if (reason & REASON_PRIVILEGED)
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
else
@@ -1165,6 +1197,16 @@ bail:
exception_exit(prev_state);
}
+/*
+ * This occurs when running in hypervisor mode on POWER6 or later
+ * and an illegal instruction is encountered.
+ */
+void __kprobes emulation_assist_interrupt(struct pt_regs *regs)
+{
+ regs->msr |= REASON_ILLEGAL;
+ program_check_exception(regs);
+}
+
void alignment_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
@@ -1272,26 +1314,60 @@ void vsx_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
}
-void tm_unavailable_exception(struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+void facility_unavailable_exception(struct pt_regs *regs)
{
+ static char *facility_strings[] = {
+ [FSCR_FP_LG] = "FPU",
+ [FSCR_VECVSX_LG] = "VMX/VSX",
+ [FSCR_DSCR_LG] = "DSCR",
+ [FSCR_PM_LG] = "PMU SPRs",
+ [FSCR_BHRB_LG] = "BHRB",
+ [FSCR_TM_LG] = "TM",
+ [FSCR_EBB_LG] = "EBB",
+ [FSCR_TAR_LG] = "TAR",
+ };
+ char *facility = "unknown";
+ u64 value;
+ u8 status;
+ bool hv;
+
+ hv = (regs->trap == 0xf80);
+ if (hv)
+ value = mfspr(SPRN_HFSCR);
+ else
+ value = mfspr(SPRN_FSCR);
+
+ status = value >> 56;
+ if (status == FSCR_DSCR_LG) {
+ /* User is acessing the DSCR. Set the inherit bit and allow
+ * the user to set it directly in future by setting via the
+ * FSCR DSCR bit. We always leave HFSCR DSCR set.
+ */
+ current->thread.dscr_inherit = 1;
+ mtspr(SPRN_FSCR, value | FSCR_DSCR);
+ return;
+ }
+
+ if ((status < ARRAY_SIZE(facility_strings)) &&
+ facility_strings[status])
+ facility = facility_strings[status];
+
/* We restore the interrupt state now */
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- /* Currently we never expect a TMU exception. Catch
- * this and kill the process!
- */
- printk(KERN_EMERG "Unexpected TM unavailable exception at %lx "
- "(msr %lx)\n",
- regs->nip, regs->msr);
+ pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
+ hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return;
}
- die("Unexpected TM unavailable exception", regs, SIGABRT);
+ die("Unexpected facility unavailable exception", regs, SIGABRT);
}
+#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1385,12 +1461,6 @@ void performance_monitor_exception(struct pt_regs *regs)
#ifdef CONFIG_8xx
void SoftwareEmulation(struct pt_regs *regs)
{
- extern int do_mathemu(struct pt_regs *);
- extern int Soft_emulate_8xx(struct pt_regs *);
-#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU)
- int errcode;
-#endif
-
CHECK_FULL_REGS(regs);
if (!user_mode(regs)) {
@@ -1398,48 +1468,10 @@ void SoftwareEmulation(struct pt_regs *regs)
die("Kernel Mode Software FPU Emulation", regs, SIGFPE);
}
-#ifdef CONFIG_MATH_EMULATION
- errcode = do_mathemu(regs);
- if (errcode >= 0)
- PPC_WARN_EMULATED(math, regs);
-
- switch (errcode) {
- case 0:
- emulate_single_step(regs);
+ if (!emulate_math(regs))
return;
- case 1: {
- int code = 0;
- code = __parse_fpscr(current->thread.fpscr.val);
- _exception(SIGFPE, regs, code, regs->nip);
- return;
- }
- case -EFAULT:
- _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- return;
- default:
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return;
- }
-
-#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
- errcode = Soft_emulate_8xx(regs);
- if (errcode >= 0)
- PPC_WARN_EMULATED(8xx, regs);
- switch (errcode) {
- case 0:
- emulate_single_step(regs);
- return;
- case 1:
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return;
- case -EFAULT:
- _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- return;
- }
-#else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-#endif
}
#endif /* CONFIG_8xx */
@@ -1786,8 +1818,6 @@ struct ppc_emulated ppc_emulated = {
WARN_EMULATED_SETUP(unaligned),
#ifdef CONFIG_MATH_EMULATION
WARN_EMULATED_SETUP(math),
-#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
- WARN_EMULATED_SETUP(8xx),
#endif
#ifdef CONFIG_VSX
WARN_EMULATED_SETUP(vsx),
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 9d3fdcd66290..a15837519dca 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -50,7 +50,7 @@ void __init udbg_early_init(void)
udbg_init_debug_beat();
#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
udbg_init_pas_realmode();
-#elif defined(CONFIG_BOOTX_TEXT)
+#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
udbg_init_btext();
#elif defined(CONFIG_PPC_EARLY_DEBUG_44x)
/* PPC44x debug */
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 6837f839ab78..75702e207b29 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -18,23 +18,19 @@ extern void real_writeb(u8 data, volatile u8 __iomem *addr);
extern u8 real_205_readb(volatile u8 __iomem *addr);
extern void real_205_writeb(u8 data, volatile u8 __iomem *addr);
-struct NS16550 {
- /* this struct must be packed */
- unsigned char rbr; /* 0 */
- unsigned char ier; /* 1 */
- unsigned char fcr; /* 2 */
- unsigned char lcr; /* 3 */
- unsigned char mcr; /* 4 */
- unsigned char lsr; /* 5 */
- unsigned char msr; /* 6 */
- unsigned char scr; /* 7 */
-};
-
-#define thr rbr
-#define iir fcr
-#define dll rbr
-#define dlm ier
-#define dlab lcr
+#define UART_RBR 0
+#define UART_IER 1
+#define UART_FCR 2
+#define UART_LCR 3
+#define UART_MCR 4
+#define UART_LSR 5
+#define UART_MSR 6
+#define UART_SCR 7
+#define UART_THR UART_RBR
+#define UART_IIR UART_FCR
+#define UART_DLL UART_RBR
+#define UART_DLM UART_IER
+#define UART_DLAB UART_LCR
#define LSR_DR 0x01 /* Data ready */
#define LSR_OE 0x02 /* Overrun */
@@ -47,52 +43,62 @@ struct NS16550 {
#define LCR_DLAB 0x80
-static struct NS16550 __iomem *udbg_comport;
+static u8 (*udbg_uart_in)(unsigned int reg);
+static void (*udbg_uart_out)(unsigned int reg, u8 data);
-static void udbg_550_flush(void)
+static void udbg_uart_flush(void)
{
- if (udbg_comport) {
- while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
- /* wait for idle */;
- }
+ if (!udbg_uart_in)
+ return;
+
+ /* wait for idle */
+ while ((udbg_uart_in(UART_LSR) & LSR_THRE) == 0)
+ cpu_relax();
}
-static void udbg_550_putc(char c)
+static void udbg_uart_putc(char c)
{
- if (udbg_comport) {
- if (c == '\n')
- udbg_550_putc('\r');
- udbg_550_flush();
- out_8(&udbg_comport->thr, c);
- }
+ if (!udbg_uart_out)
+ return;
+
+ if (c == '\n')
+ udbg_uart_putc('\r');
+ udbg_uart_flush();
+ udbg_uart_out(UART_THR, c);
}
-static int udbg_550_getc_poll(void)
+static int udbg_uart_getc_poll(void)
{
- if (udbg_comport) {
- if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0)
- return in_8(&udbg_comport->rbr);
- else
- return -1;
- }
+ if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR))
+ return udbg_uart_in(UART_RBR);
return -1;
}
-static int udbg_550_getc(void)
+static int udbg_uart_getc(void)
{
- if (udbg_comport) {
- while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0)
- /* wait for char */;
- return in_8(&udbg_comport->rbr);
- }
- return -1;
+ if (!udbg_uart_in)
+ return -1;
+ /* wait for char */
+ while (!(udbg_uart_in(UART_LSR) & LSR_DR))
+ cpu_relax();
+ return udbg_uart_in(UART_RBR);
+}
+
+static void udbg_use_uart(void)
+{
+ udbg_putc = udbg_uart_putc;
+ udbg_flush = udbg_uart_flush;
+ udbg_getc = udbg_uart_getc;
+ udbg_getc_poll = udbg_uart_getc_poll;
}
-void udbg_init_uart(void __iomem *comport, unsigned int speed,
- unsigned int clock)
+void udbg_uart_setup(unsigned int speed, unsigned int clock)
{
unsigned int dll, base_bauds;
+ if (!udbg_uart_out)
+ return;
+
if (clock == 0)
clock = 1843200;
if (speed == 0)
@@ -101,51 +107,43 @@ void udbg_init_uart(void __iomem *comport, unsigned int speed,
base_bauds = clock / 16;
dll = base_bauds / speed;
- if (comport) {
- udbg_comport = (struct NS16550 __iomem *)comport;
- out_8(&udbg_comport->lcr, 0x00);
- out_8(&udbg_comport->ier, 0xff);
- out_8(&udbg_comport->ier, 0x00);
- out_8(&udbg_comport->lcr, LCR_DLAB);
- out_8(&udbg_comport->dll, dll & 0xff);
- out_8(&udbg_comport->dlm, dll >> 8);
- /* 8 data, 1 stop, no parity */
- out_8(&udbg_comport->lcr, 0x03);
- /* RTS/DTR */
- out_8(&udbg_comport->mcr, 0x03);
- /* Clear & enable FIFOs */
- out_8(&udbg_comport->fcr ,0x07);
- udbg_putc = udbg_550_putc;
- udbg_flush = udbg_550_flush;
- udbg_getc = udbg_550_getc;
- udbg_getc_poll = udbg_550_getc_poll;
- }
+ udbg_uart_out(UART_LCR, 0x00);
+ udbg_uart_out(UART_IER, 0xff);
+ udbg_uart_out(UART_IER, 0x00);
+ udbg_uart_out(UART_LCR, LCR_DLAB);
+ udbg_uart_out(UART_DLL, dll & 0xff);
+ udbg_uart_out(UART_DLM, dll >> 8);
+ /* 8 data, 1 stop, no parity */
+ udbg_uart_out(UART_LCR, 0x3);
+ /* RTS/DTR */
+ udbg_uart_out(UART_MCR, 0x3);
+ /* Clear & enable FIFOs */
+ udbg_uart_out(UART_FCR, 0x7);
}
-unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock)
+unsigned int udbg_probe_uart_speed(unsigned int clock)
{
unsigned int dll, dlm, divisor, prescaler, speed;
u8 old_lcr;
- struct NS16550 __iomem *port = comport;
- old_lcr = in_8(&port->lcr);
+ old_lcr = udbg_uart_in(UART_LCR);
/* select divisor latch registers. */
- out_8(&port->lcr, LCR_DLAB);
+ udbg_uart_out(UART_LCR, old_lcr | LCR_DLAB);
/* now, read the divisor */
- dll = in_8(&port->dll);
- dlm = in_8(&port->dlm);
+ dll = udbg_uart_in(UART_DLL);
+ dlm = udbg_uart_in(UART_DLM);
divisor = dlm << 8 | dll;
/* check prescaling */
- if (in_8(&port->mcr) & 0x80)
+ if (udbg_uart_in(UART_MCR) & 0x80)
prescaler = 4;
else
prescaler = 1;
/* restore the LCR */
- out_8(&port->lcr, old_lcr);
+ udbg_uart_out(UART_LCR, old_lcr);
/* calculate speed */
speed = (clock / prescaler) / (divisor * 16);
@@ -157,195 +155,155 @@ unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock)
return speed;
}
-#ifdef CONFIG_PPC_MAPLE
-void udbg_maple_real_flush(void)
+static union {
+ unsigned char __iomem *mmio_base;
+ unsigned long pio_base;
+} udbg_uart;
+
+static unsigned int udbg_uart_stride = 1;
+
+static u8 udbg_uart_in_pio(unsigned int reg)
{
- if (udbg_comport) {
- while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
- /* wait for idle */;
- }
+ return inb(udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void udbg_maple_real_putc(char c)
+static void udbg_uart_out_pio(unsigned int reg, u8 data)
{
- if (udbg_comport) {
- if (c == '\n')
- udbg_maple_real_putc('\r');
- udbg_maple_real_flush();
- real_writeb(c, &udbg_comport->thr); eieio();
- }
+ outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void __init udbg_init_maple_realmode(void)
+void udbg_uart_init_pio(unsigned long port, unsigned int stride)
{
- udbg_comport = (struct NS16550 __iomem *)0xf40003f8;
-
- udbg_putc = udbg_maple_real_putc;
- udbg_flush = udbg_maple_real_flush;
- udbg_getc = NULL;
- udbg_getc_poll = NULL;
+ if (!port)
+ return;
+ udbg_uart.pio_base = port;
+ udbg_uart_stride = stride;
+ udbg_uart_in = udbg_uart_in_pio;
+ udbg_uart_out = udbg_uart_out_pio;
+ udbg_use_uart();
}
-#endif /* CONFIG_PPC_MAPLE */
-#ifdef CONFIG_PPC_PASEMI
-void udbg_pas_real_flush(void)
+static u8 udbg_uart_in_mmio(unsigned int reg)
{
- if (udbg_comport) {
- while ((real_205_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
- /* wait for idle */;
- }
+ return in_8(udbg_uart.mmio_base + (reg * udbg_uart_stride));
}
-void udbg_pas_real_putc(char c)
+static void udbg_uart_out_mmio(unsigned int reg, u8 data)
{
- if (udbg_comport) {
- if (c == '\n')
- udbg_pas_real_putc('\r');
- udbg_pas_real_flush();
- real_205_writeb(c, &udbg_comport->thr); eieio();
- }
+ out_8(udbg_uart.mmio_base + (reg * udbg_uart_stride), data);
}
-void udbg_init_pas_realmode(void)
-{
- udbg_comport = (struct NS16550 __iomem *)0xfcff03f8UL;
- udbg_putc = udbg_pas_real_putc;
- udbg_flush = udbg_pas_real_flush;
- udbg_getc = NULL;
- udbg_getc_poll = NULL;
+void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+{
+ if (!addr)
+ return;
+ udbg_uart.mmio_base = addr;
+ udbg_uart_stride = stride;
+ udbg_uart_in = udbg_uart_in_mmio;
+ udbg_uart_out = udbg_uart_out_mmio;
+ udbg_use_uart();
}
-#endif /* CONFIG_PPC_MAPLE */
-#ifdef CONFIG_PPC_EARLY_DEBUG_44x
-#include <platforms/44x/44x.h>
+#ifdef CONFIG_PPC_MAPLE
+
+#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8)
-static void udbg_44x_as1_flush(void)
+static u8 udbg_uart_in_maple(unsigned int reg)
{
- if (udbg_comport) {
- while ((as1_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
- /* wait for idle */;
- }
+ return real_readb(UDBG_UART_MAPLE_ADDR + reg);
}
-static void udbg_44x_as1_putc(char c)
+static void udbg_uart_out_maple(unsigned int reg, u8 val)
{
- if (udbg_comport) {
- if (c == '\n')
- udbg_44x_as1_putc('\r');
- udbg_44x_as1_flush();
- as1_writeb(c, &udbg_comport->thr); eieio();
- }
+ real_writeb(val, UDBG_UART_MAPLE_ADDR + reg);
}
-static int udbg_44x_as1_getc(void)
+void __init udbg_init_maple_realmode(void)
{
- if (udbg_comport) {
- while ((as1_readb(&udbg_comport->lsr) & LSR_DR) == 0)
- ; /* wait for char */
- return as1_readb(&udbg_comport->rbr);
- }
- return -1;
+ udbg_uart_in = udbg_uart_in_maple;
+ udbg_uart_out = udbg_uart_out_maple;
+ udbg_use_uart();
}
-void __init udbg_init_44x_as1(void)
-{
- udbg_comport =
- (struct NS16550 __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR;
+#endif /* CONFIG_PPC_MAPLE */
- udbg_putc = udbg_44x_as1_putc;
- udbg_flush = udbg_44x_as1_flush;
- udbg_getc = udbg_44x_as1_getc;
-}
-#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+#ifdef CONFIG_PPC_PASEMI
-#ifdef CONFIG_PPC_EARLY_DEBUG_40x
-static void udbg_40x_real_flush(void)
+#define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL)
+
+static u8 udbg_uart_in_pas(unsigned int reg)
{
- if (udbg_comport) {
- while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
- /* wait for idle */;
- }
+ return real_205_readb(UDBG_UART_PAS_ADDR + reg);
}
-static void udbg_40x_real_putc(char c)
+static void udbg_uart_out_pas(unsigned int reg, u8 val)
{
- if (udbg_comport) {
- if (c == '\n')
- udbg_40x_real_putc('\r');
- udbg_40x_real_flush();
- real_writeb(c, &udbg_comport->thr); eieio();
- }
+ real_205_writeb(val, UDBG_UART_PAS_ADDR + reg);
}
-static int udbg_40x_real_getc(void)
+void __init udbg_init_pas_realmode(void)
{
- if (udbg_comport) {
- while ((real_readb(&udbg_comport->lsr) & LSR_DR) == 0)
- ; /* wait for char */
- return real_readb(&udbg_comport->rbr);
- }
- return -1;
+ udbg_uart_in = udbg_uart_in_pas;
+ udbg_uart_out = udbg_uart_out_pas;
+ udbg_use_uart();
}
-void __init udbg_init_40x_realmode(void)
-{
- udbg_comport = (struct NS16550 __iomem *)
- CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR;
+#endif /* CONFIG_PPC_PASEMI */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
- udbg_putc = udbg_40x_real_putc;
- udbg_flush = udbg_40x_real_flush;
- udbg_getc = udbg_40x_real_getc;
- udbg_getc_poll = NULL;
+#include <platforms/44x/44x.h>
+
+static u8 udbg_uart_in_44x_as1(unsigned int reg)
+{
+ return as1_readb((void __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR + reg);
}
-#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
-#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
-static void udbg_wsp_flush(void)
+static void udbg_uart_out_44x_as1(unsigned int reg, u8 val)
{
- if (udbg_comport) {
- while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0)
- /* wait for idle */;
- }
+ as1_writeb(val, (void __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR + reg);
}
-static void udbg_wsp_putc(char c)
+void __init udbg_init_44x_as1(void)
{
- if (udbg_comport) {
- if (c == '\n')
- udbg_wsp_putc('\r');
- udbg_wsp_flush();
- writeb(c, &udbg_comport->thr); eieio();
- }
+ udbg_uart_in = udbg_uart_in_44x_as1;
+ udbg_uart_out = udbg_uart_out_44x_as1;
+ udbg_use_uart();
}
-static int udbg_wsp_getc(void)
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_40x
+
+static u8 udbg_uart_in_40x(unsigned int reg)
{
- if (udbg_comport) {
- while ((readb(&udbg_comport->lsr) & LSR_DR) == 0)
- ; /* wait for char */
- return readb(&udbg_comport->rbr);
- }
- return -1;
+ return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR
+ + reg);
}
-static int udbg_wsp_getc_poll(void)
+static void udbg_uart_out_40x(unsigned int reg, u8 val)
{
- if (udbg_comport)
- if (readb(&udbg_comport->lsr) & LSR_DR)
- return readb(&udbg_comport->rbr);
- return -1;
+ real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR
+ + reg);
}
-void __init udbg_init_wsp(void)
+void __init udbg_init_40x_realmode(void)
{
- udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT;
+ udbg_uart_in = udbg_uart_in_40x;
+ udbg_uart_out = udbg_uart_out_40x;
+ udbg_use_uart();
+}
- udbg_init_uart(udbg_comport, 57600, 50000000);
+#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
- udbg_putc = udbg_wsp_putc;
- udbg_flush = udbg_wsp_flush;
- udbg_getc = udbg_wsp_getc;
- udbg_getc_poll = udbg_wsp_getc_poll;
+void __init udbg_init_wsp(void)
+{
+ udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1);
+ udbg_uart_setup(57600, 50000000);
}
+
#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d4f463ac65b1..1d9c92621b36 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -711,7 +711,7 @@ static void __init vdso_setup_syscall_map(void)
}
#ifdef CONFIG_PPC64
-int __cpuinit vdso_getcpu_init(void)
+int vdso_getcpu_init(void)
{
unsigned long cpu, node, val;
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 27e2f623210b..6b1f2a6d5517 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -232,9 +232,9 @@ __do_get_tspec:
lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
/* Get a stable TB value */
-2: mftbu r3
- mftbl r4
- mftbu r0
+2: mfspr r3, SPRN_TBRU
+ mfspr r4, SPRN_TBRL
+ mfspr r0, SPRN_TBRU
cmplw cr0,r3,r0
bne- 2b
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 536016d792ba..d38cc08b16c7 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1153,7 +1153,7 @@ EXPORT_SYMBOL(vio_h_cop_sync);
static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
{
- const unsigned char *dma_window;
+ const __be32 *dma_window;
struct iommu_table *tbl;
unsigned long offset, size;
@@ -1312,8 +1312,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
{
struct vio_dev *viodev;
struct device_node *parent_node;
- const unsigned int *unit_address;
- const unsigned int *pfo_resid = NULL;
+ const __be32 *prop;
enum vio_dev_family family;
const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
@@ -1360,6 +1359,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
/* we need the 'device_type' property, in order to match with drivers */
viodev->family = family;
if (viodev->family == VDEVICE) {
+ unsigned int unit_address;
+
if (of_node->type != NULL)
viodev->type = of_node->type;
else {
@@ -1368,24 +1369,24 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
goto out;
}
- unit_address = of_get_property(of_node, "reg", NULL);
- if (unit_address == NULL) {
+ prop = of_get_property(of_node, "reg", NULL);
+ if (prop == NULL) {
pr_warn("%s: node %s missing 'reg'\n",
__func__, of_node_name);
goto out;
}
- dev_set_name(&viodev->dev, "%x", *unit_address);
+ unit_address = of_read_number(prop, 1);
+ dev_set_name(&viodev->dev, "%x", unit_address);
viodev->irq = irq_of_parse_and_map(of_node, 0);
- viodev->unit_address = *unit_address;
+ viodev->unit_address = unit_address;
} else {
/* PFO devices need their resource_id for submitting COP_OPs
* This is an optional field for devices, but is required when
* performing synchronous ops */
- pfo_resid = of_get_property(of_node, "ibm,resource-id", NULL);
- if (pfo_resid != NULL)
- viodev->resource_id = *pfo_resid;
+ prop = of_get_property(of_node, "ibm,resource-id", NULL);
+ if (prop != NULL)
+ viodev->resource_id = of_read_number(prop, 1);
- unit_address = NULL;
dev_set_name(&viodev->dev, "%s", of_node_name);
viodev->type = of_node_name;
viodev->irq = 0;
@@ -1529,11 +1530,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
const char *cp;
dn = dev->of_node;
- if (!dn)
- return -ENODEV;
+ if (!dn) {
+ strcat(buf, "\n");
+ return strlen(buf);
+ }
cp = of_get_property(dn, "compatible", NULL);
- if (!cp)
- return -ENODEV;
+ if (!cp) {
+ strcat(buf, "\n");
+ return strlen(buf);
+ }
return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
}
@@ -1622,7 +1627,6 @@ static struct vio_dev *vio_find_name(const char *name)
*/
struct vio_dev *vio_find_node(struct device_node *vnode)
{
- const uint32_t *unit_address;
char kobj_name[20];
struct device_node *vnode_parent;
const char *dev_type;
@@ -1638,10 +1642,13 @@ struct vio_dev *vio_find_node(struct device_node *vnode)
/* construct the kobject name from the device node */
if (!strcmp(dev_type, "vdevice")) {
- unit_address = of_get_property(vnode, "reg", NULL);
- if (!unit_address)
+ const __be32 *prop;
+
+ prop = of_get_property(vnode, "reg", NULL);
+ if (!prop)
return NULL;
- snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address);
+ snprintf(kobj_name, sizeof(kobj_name), "%x",
+ (uint32_t)of_read_number(prop, 1));
} else if (!strcmp(dev_type, "ibm,platform-facilities"))
snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name);
else
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 654e479802f2..f096e72262f4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4;
#endif
SECTIONS
{
- . = 0;
- reloc_start = .;
-
. = KERNELBASE;
/*