From dee4102a9a5882b4f7d5cc165ba29e8cc63cf92e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 11 Jan 2009 00:29:15 -0800 Subject: sparseirq: use kstat_irqs_cpu instead Impact: build fix Ingo Molnar wrote: > tip/arch/blackfin/kernel/irqchip.c: In function 'show_interrupts': > tip/arch/blackfin/kernel/irqchip.c:85: error: 'struct kernel_stat' has no member named 'irqs' > make[2]: *** [arch/blackfin/kernel/irqchip.o] Error 1 > make[2]: *** Waiting for unfinished jobs.... > So could move kstat_irqs array to irq_desc struct. (s390, m68k, sparc) are not touched yet, because they don't support genirq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/platforms/cell/interrupt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 23b8b5e36f98..17efb7118db1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -190,7 +190,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%3d: ", i); #ifdef CONFIG_SMP for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #else seq_printf(p, "%10u ", kstat_irqs(i)); #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 28c04dab2633..1f0d774ad928 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -254,7 +254,7 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) goto out_eoi; } - kstat_cpu(cpu).irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; -- cgit v1.2.3-58-ga151 From e65e49d0f3714f4a6a42f6f6a19926ba33fcda75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 Jan 2009 15:27:13 -0800 Subject: irq: update all arches for new irq_desc Impact: cleanup, update to new cpumask API Irq_desc.affinity and irq_desc.pending_mask are now cpumask_var_t's so access to them should be using the new cpumask API. Signed-off-by: Mike Travis --- arch/alpha/kernel/irq.c | 2 +- arch/arm/kernel/irq.c | 18 ++++++++++++------ arch/arm/oprofile/op_model_mpcore.c | 2 +- arch/blackfin/kernel/irqchip.c | 5 +++++ arch/ia64/kernel/iosapic.c | 2 +- arch/ia64/kernel/irq.c | 4 ++-- arch/ia64/kernel/msi_ia64.c | 4 ++-- arch/ia64/sn/kernel/msi_sn.c | 2 +- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/irq-gic.c | 2 +- arch/mips/kernel/smtc.c | 2 +- arch/mips/mti-malta/malta-smtc.c | 5 +++-- arch/parisc/kernel/irq.c | 8 ++++---- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/platforms/pseries/xics.c | 5 +++-- arch/powerpc/sysdev/mpic.c | 3 ++- arch/sparc/kernel/irq_64.c | 5 +++-- 17 files changed, 44 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 703731accda6..7bc7489223f3 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); return 0; } diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 7141cee1fab7..4bb723eadad1 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -104,6 +104,11 @@ static struct irq_desc bad_irq_desc = { .lock = SPIN_LOCK_UNLOCKED }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating bad_irq_desc.affinity or .pending_mask */ +#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + /* * do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their @@ -161,7 +166,7 @@ void __init init_IRQ(void) irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE; #ifdef CONFIG_SMP - bad_irq_desc.affinity = CPU_MASK_ALL; + cpumask_setall(bad_irq_desc.affinity); bad_irq_desc.cpu = smp_processor_id(); #endif init_arch_irq(); @@ -191,15 +196,16 @@ void migrate_irqs(void) struct irq_desc *desc = irq_desc + i; if (desc->cpu == cpu) { - unsigned int newcpu = any_online_cpu(desc->affinity); - - if (newcpu == NR_CPUS) { + unsigned int newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); + if (newcpu >= nr_cpu_ids) { if (printk_ratelimit()) printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n", i, cpu); - cpus_setall(desc->affinity); - newcpu = any_online_cpu(desc->affinity); + cpumask_setall(desc->affinity); + newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); } route_irq(desc, i, newcpu); diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 6d6bd5899240..853d42bb8682 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -263,7 +263,7 @@ static void em_route_irq(int irq, unsigned int cpu) const struct cpumask *mask = cpumask_of(cpu); spin_lock_irq(&desc->lock); - desc->affinity = *mask; + cpumask_copy(desc->affinity, mask); desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index ab8209cbbad0..5780d6df1542 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -69,6 +69,11 @@ static struct irq_desc bad_irq_desc = { #endif }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating a variable-sized bad_irq_desc.affinity */ +#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j; diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5cfd3d91001a..006ad366a454 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -880,7 +880,7 @@ iosapic_unregister_intr (unsigned int gsi) if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP /* Clear affinity */ - cpus_setall(idesc->affinity); + cpumask_setall(idesc->affinity); #endif /* Clear the interrupt information */ iosapic_intr_info[irq].dest = 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index a58f64ca9f0e..226233a6fa19 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -103,7 +103,7 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { if (irq < NR_IRQS) { - cpumask_copy(&irq_desc[irq].affinity, + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu_logical_id(hwid))); irq_redir[irq] = (char) (redir & 0xff); } @@ -148,7 +148,7 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + if (cpumask_any_and(irq_desc[irq].affinity, cpu_online_mask) >= nr_cpu_ids) { /* * Save it for phase 2 processing diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 890339339035..dcb6b7c51ea7 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -75,7 +75,7 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, msg.data = data; write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); } #endif /* CONFIG_SMP */ @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = *mask; + cpumask_copy(irq_desc[irq].affinity, mask); } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index ca553b0429ce..81e428943d73 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -205,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = *cpu_mask; + cpumask_copy(irq_desc[irq].affinity, cpu_mask); } #endif /* CONFIG_SMP */ diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index abc62aa744ac..3214ade02d10 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -66,7 +66,7 @@ extern void smtc_forward_irq(unsigned int irq); */ #define IRQ_AFFINITY_HOOK(irq) \ do { \ - if (!cpu_isset(smp_processor_id(), irq_desc[irq].affinity)) { \ + if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\ smtc_forward_irq(irq); \ irq_exit(); \ return; \ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 494a49a317e9..87deb8f6c458 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = *cpumask; + cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index b6cca01ff82b..d2c1ab12425a 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -686,7 +686,7 @@ void smtc_forward_irq(unsigned int irq) * and efficiency, we just pick the easiest one to find. */ - target = first_cpu(irq_desc[irq].affinity); + target = cpumask_first(irq_desc[irq].affinity); /* * We depend on the platform code to have correctly processed diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index aabd7274507b..5ba31888fefb 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -116,7 +116,7 @@ struct plat_smp_ops msmtc_smp_ops = { void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { - cpumask_t tmask = *affinity; + cpumask_t tmask; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,11 +139,12 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) * be made to forward to an offline "CPU". */ + cpumask_copy(&tmask, affinity); for_each_cpu(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } - irq_desc[irq].affinity = tmask; + cpumask_copy(irq_desc[irq].affinity, &tmask); if (cpus_empty(tmask)) /* diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index ac2c822928c7..49482806863f 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -120,7 +120,7 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) if (CHECK_IRQ_PER_CPU(irq)) { /* Bad linux design decision. The mask has already * been set; we must reset it */ - irq_desc[irq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[irq].affinity); return -EINVAL; } @@ -136,7 +136,7 @@ static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) if (cpu_check_affinity(irq, dest)) return; - irq_desc[irq].affinity = *dest; + cpumask_copy(irq_desc[irq].affinity, dest); } #endif @@ -295,7 +295,7 @@ int txn_alloc_irq(unsigned int bits_wide) unsigned long txn_affinity_addr(unsigned int irq, int cpu) { #ifdef CONFIG_SMP - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); #endif return per_cpu(cpu_data, cpu).txn_addr; @@ -352,7 +352,7 @@ void do_cpu_irq_mask(struct pt_regs *regs) irq = eirr_to_irq(eirr_val); #ifdef CONFIG_SMP - dest = irq_desc[irq].affinity; + cpumask_copy(&dest, irq_desc[irq].affinity); if (CHECK_IRQ_PER_CPU(irq_desc[irq].status) && !cpu_isset(smp_processor_id(), dest)) { int cpu = first_cpu(dest); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 23b8b5e36f98..ad1e5ac721d8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -231,7 +231,7 @@ void fixup_irqs(cpumask_t map) if (irq_desc[irq].status & IRQ_PER_CPU) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + cpumask_and(&mask, irq_desc[irq].affinity, &map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 84e058f1e1cc..80b513449f4c 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -153,9 +153,10 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check) { int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_desc[virq].affinity; + cpumask_t cpumask; cpumask_t tmp = CPU_MASK_NONE; + cpumask_copy(&cpumask, irq_desc[virq].affinity); if (!distribute_irqs) return default_server; @@ -869,7 +870,7 @@ void xics_migrate_irqs_away(void) virq, cpu); /* Reset affinity to all cpus */ - irq_desc[virq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[virq].affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3e0d89dcdba2..0afd21f9a222 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -566,9 +566,10 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index cab8e0286871..4ac5c651e00d 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -247,9 +247,10 @@ struct irq_handler_data { #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); @@ -854,7 +855,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - &irq_desc[irq].affinity); + irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } -- cgit v1.2.3-58-ga151 From 74e7904559a10cbb9fbf9139c5c42fc87c0f62a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 17 Jan 2009 15:26:32 +0900 Subject: linker script: add missing .data.percpu.page_aligned arm, arm/mach-integrator and powerpc were missing .data.percpu.page_aligned in their percpu output section definitions. Add it. Signed-off-by: Tejun Heo --- arch/arm/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 1 + arch/powerpc/kernel/vmlinux.lds.S | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 00216071eaf7..85598f7da407 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -65,6 +65,7 @@ SECTIONS #endif . = ALIGN(4096); __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 10a7d47e8510..f45e4e508eca 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -219,6 +219,7 @@ SECTIONS .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 47bf15cd2c9e..04e8ecea9b40 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -182,6 +182,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; -- cgit v1.2.3-58-ga151 From 4be2c7ff4f362e41706e84a3d6726bdcda9ab65f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 31 Jan 2009 11:41:04 +0530 Subject: headers_check fix: powerpc, bootx.h fix the following 'make headers_check' warnings: usr/include/asm-powerpc/bootx.h:12: include of is preferred over usr/include/asm-powerpc/bootx.h:57: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/powerpc/include/asm/bootx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/bootx.h b/arch/powerpc/include/asm/bootx.h index 57b82e3f89ce..60a3c9ef3017 100644 --- a/arch/powerpc/include/asm/bootx.h +++ b/arch/powerpc/include/asm/bootx.h @@ -9,7 +9,7 @@ #ifndef __ASM_BOOTX_H__ #define __ASM_BOOTX_H__ -#include +#include #ifdef macintosh #include -- cgit v1.2.3-58-ga151 From 785857f5f0b7fbeed934e39af24edec471637375 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 31 Jan 2009 11:42:29 +0530 Subject: headers_check fix: powerpc, elf.h fix the following 'make headers_check' warning: usr/include/asm-powerpc/elf.h:5: include of is preferred over Signed-off-by: Jaswinder Singh Rajput --- arch/powerpc/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index cd46f023ec6d..b5600ce6055e 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -7,7 +7,7 @@ #include #endif -#include +#include #include #include #include -- cgit v1.2.3-58-ga151 From 9f2cd967b7f029ebe2c74969709ff9c745344dba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 31 Jan 2009 11:44:45 +0530 Subject: headers_check fix: powerpc, kvm.h fix the following 'make headers_check' warnings: usr/include/asm-powerpc/kvm.h:23: include of is preferred over usr/include/asm-powerpc/kvm.h:26: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/powerpc/include/asm/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f993e4198d5c..4e0cf65f7f5a 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -20,7 +20,7 @@ #ifndef __LINUX_KVM_POWERPC_H #define __LINUX_KVM_POWERPC_H -#include +#include struct kvm_regs { __u64 pc; -- cgit v1.2.3-58-ga151 From 122bb2207b8107ce117d10fcfd3a2f6c3804a362 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 31 Jan 2009 11:46:23 +0530 Subject: headers_check fix: powerpc, ps3fb.h fix the following 'make headers_check' warning: usr/include/asm-powerpc/ps3fb.h:33: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/powerpc/include/asm/ps3fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/ps3fb.h b/arch/powerpc/include/asm/ps3fb.h index 3f121fe4010d..e7233a849680 100644 --- a/arch/powerpc/include/asm/ps3fb.h +++ b/arch/powerpc/include/asm/ps3fb.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PS3FB_H_ #define _ASM_POWERPC_PS3FB_H_ +#include #include /* ioctl */ -- cgit v1.2.3-58-ga151 From 1a16bc4590fcc94630571c541c8fef7a0845c2c0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 31 Jan 2009 11:52:05 +0530 Subject: headers_check fix: powerpc, spu_info.h fix the following 'make headers_check' warning: usr/include/asm-powerpc/spu_info.h:27: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/powerpc/include/asm/spu_info.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/spu_info.h b/arch/powerpc/include/asm/spu_info.h index 3545efbf9891..1286c823f0d8 100644 --- a/arch/powerpc/include/asm/spu_info.h +++ b/arch/powerpc/include/asm/spu_info.h @@ -23,9 +23,10 @@ #ifndef _SPU_INFO_H #define _SPU_INFO_H +#include + #ifdef __KERNEL__ #include -#include #else struct mfc_cq_sr { __u64 mfc_cq_data0_RW; -- cgit v1.2.3-58-ga151 From 48109870bab7e66f30f933cd218258368024cd9f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 31 Jan 2009 11:54:05 +0530 Subject: headers_check fix: powerpc, swab.h fix the following 'make headers_check' warning: usr/include/asm-powerpc/swab.h:11: include of is preferred over Signed-off-by: Jaswinder Singh Rajput --- arch/powerpc/include/asm/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h index ef824ae4b79c..c581e3ef73ed 100644 --- a/arch/powerpc/include/asm/swab.h +++ b/arch/powerpc/include/asm/swab.h @@ -8,7 +8,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #ifdef __GNUC__ -- cgit v1.2.3-58-ga151 From cb9eff097831007afb30d64373f29d99825d0068 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:36 +0000 Subject: net: new user space API for time stamping of incoming and outgoing packets User space can request hardware and/or software time stamping. Reporting of the result(s) via a new control message is enabled separately for each field in the message because some of the fields may require additional computation and thus cause overhead. User space can tell the different kinds of time stamps apart and choose what suits its needs. When a TX timestamp operation is requested, the TX skb will be cloned and the clone will be time stamped (in hardware or software) and added to the socket error queue of the skb, if the skb has a socket associated with it. The actual TX timestamp will reach userspace as a RX timestamp on the cloned packet. If timestamping is requested and no timestamping is done in the device driver (potentially this may use hardware timestamping), it will be done in software after the device's start_hard_xmit routine. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 178 +++++++ Documentation/networking/timestamping/.gitignore | 1 + Documentation/networking/timestamping/Makefile | 6 + .../networking/timestamping/timestamping.c | 533 +++++++++++++++++++++ arch/alpha/include/asm/socket.h | 3 + arch/arm/include/asm/socket.h | 3 + arch/avr32/include/asm/socket.h | 3 + arch/blackfin/include/asm/socket.h | 3 + arch/cris/include/asm/socket.h | 3 + arch/h8300/include/asm/socket.h | 3 + arch/ia64/include/asm/socket.h | 3 + arch/m68k/include/asm/socket.h | 3 + arch/mips/include/asm/socket.h | 3 + arch/parisc/include/asm/socket.h | 3 + arch/powerpc/include/asm/socket.h | 3 + arch/s390/include/asm/socket.h | 3 + arch/sh/include/asm/socket.h | 3 + arch/sparc/include/asm/socket.h | 3 + arch/x86/include/asm/socket.h | 3 + arch/xtensa/include/asm/socket.h | 3 + include/asm-frv/socket.h | 3 + include/asm-m32r/socket.h | 3 + include/asm-mn10300/socket.h | 3 + include/linux/errqueue.h | 1 + include/linux/net_tstamp.h | 104 ++++ include/linux/sockios.h | 3 + 26 files changed, 883 insertions(+) create mode 100644 Documentation/networking/timestamping.txt create mode 100644 Documentation/networking/timestamping/.gitignore create mode 100644 Documentation/networking/timestamping/Makefile create mode 100644 Documentation/networking/timestamping/timestamping.c create mode 100644 include/linux/net_tstamp.h (limited to 'arch/powerpc') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt new file mode 100644 index 000000000000..a681a65b5bc7 --- /dev/null +++ b/Documentation/networking/timestamping.txt @@ -0,0 +1,178 @@ +The existing interfaces for getting network packages time stamped are: + +* SO_TIMESTAMP + Generate time stamp for each incoming packet using the (not necessarily + monotonous!) system time. Result is returned via recv_msg() in a + control message as timeval (usec resolution). + +* SO_TIMESTAMPNS + Same time stamping mechanism as SO_TIMESTAMP, but returns result as + timespec (nsec resolution). + +* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS] + Only for multicasts: approximate send time stamp by receiving the looped + packet and using its receive time stamp. + +The following interface complements the existing ones: receive time +stamps can be generated and returned for arbitrary packets and much +closer to the point where the packet is really sent. Time stamps can +be generated in software (as before) or in hardware (if the hardware +has such a feature). + +SO_TIMESTAMPING: + +Instructs the socket layer which kind of information is wanted. The +parameter is an integer with some of the following bits set. Setting +other bits is an error and doesn't change the current state. + +SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware +SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or + fails, then do it in software +SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp + as generated by the hardware +SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or + fails, then do it in software +SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp +SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to + the system time base +SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in + software + +SOF_TIMESTAMPING_TX/RX determine how time stamps are generated. +SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the +following control message: + struct scm_timestamping { + struct timespec systime; + struct timespec hwtimetrans; + struct timespec hwtimeraw; + }; + +recvmsg() can be used to get this control message for regular incoming +packets. For send time stamps the outgoing packet is looped back to +the socket's error queue with the send time stamp(s) attached. It can +be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the +original outgoing packet data including all headers preprended down to +and including the link layer, the scm_timestamping control message and +a sock_extended_err control message with ee_errno==ENOMSG and +ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending +bounced packet is ready for reading as far as select() is concerned. + +All three values correspond to the same event in time, but were +generated in different ways. Each of these values may be empty (= all +zero), in which case no such value was available. If the application +is not interested in some of these values, they can be left blank to +avoid the potential overhead of calculating them. + +systime is the value of the system time at that moment. This +corresponds to the value also returned via SO_TIMESTAMP[NS]. If the +time stamp was generated by hardware, then this field is +empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is +set. + +hwtimeraw is the original hardware time stamp. Filled in if +SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its +relation to system time should be made. + +hwtimetrans is the hardware time stamp transformed so that it +corresponds as good as possible to system time. This correlation is +not perfect; as a consequence, sorting packets received via different +NICs by their hwtimetrans may differ from the order in which they were +received. hwtimetrans may be non-monotonic even for the same NIC. +Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support +by the network device and will be empty without that support. + + +SIOCSHWTSTAMP: + +Hardware time stamping must also be initialized for each device driver +that is expected to do hardware time stamping. The parameter is: + +struct hwtstamp_config { + int flags; /* no flags defined right now, must be zero */ + int tx_type; /* HWTSTAMP_TX_* */ + int rx_filter; /* HWTSTAMP_FILTER_* */ +}; + +Desired behavior is passed into the kernel and to a specific device by +calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose +ifr_data points to a struct hwtstamp_config. The tx_type and +rx_filter are hints to the driver what it is expected to do. If +the requested fine-grained filtering for incoming packets is not +supported, the driver may time stamp more than just the requested types +of packets. + +A driver which supports hardware time stamping shall update the struct +with the actual, possibly more permissive configuration. If the +requested packets cannot be time stamped, then nothing should be +changed and ERANGE shall be returned (in contrast to EINVAL, which +indicates that SIOCSHWTSTAMP is not supported at all). + +Only a processes with admin rights may change the configuration. User +space is responsible to ensure that multiple processes don't interfere +with each other and that the settings are reset. + +/* possible values for hwtstamp_config->tx_type */ +enum { + /* + * no outgoing packet will need hardware time stamping; + * should a packet arrive which asks for it, no hardware + * time stamping will be done + */ + HWTSTAMP_TX_OFF, + + /* + * enables hardware time stamping for outgoing packets; + * the sender of the packet decides which are to be + * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE + * before sending the packet + */ + HWTSTAMP_TX_ON, +}; + +/* possible values for hwtstamp_config->rx_filter */ +enum { + /* time stamp no incoming packet at all */ + HWTSTAMP_FILTER_NONE, + + /* time stamp any incoming packet */ + HWTSTAMP_FILTER_ALL, + + /* return value: time stamp all packets requested plus some others */ + HWTSTAMP_FILTER_SOME, + + /* PTP v1, UDP, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V1_L4_EVENT, + + ... +}; + + +DEVICE IMPLEMENTATION + +A driver which supports hardware time stamping must support the +SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored +in the skb with skb_hwtstamp_set(). + +Time stamps for outgoing packets are to be generated as follows: +- In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware() + returns non-zero. If yes, then the driver is expected + to do hardware time stamping. +- If this is possible for the skb and requested, then declare + that the driver is doing the time stamping by calling + skb_hwtstamp_tx_in_progress(). A driver not supporting + hardware time stamping doesn't do that. A driver must never + touch sk_buff::tstamp! It is used to store how time stamping + for an outgoing packets is to be done. +- As soon as the driver has sent the packet and/or obtained a + hardware time stamp for it, it passes the time stamp back by + calling skb_hwtstamp_tx() with the original skb, the raw + hardware time stamp and a handle to the device (necessary + to convert the hardware time stamp to system time). If obtaining + the hardware time stamp somehow fails, then the driver should + not fall back to software time stamping. The rationale is that + this would occur at a later time in the processing pipeline + than other software time stamping and therefore could lead + to unexpected deltas between time stamps. +- If the driver did not call skb_hwtstamp_tx_in_progress(), then + dev_hard_start_xmit() checks whether software time stamping + is wanted as fallback and potentially generates the time stamp. diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore new file mode 100644 index 000000000000..71e81eb2e22f --- /dev/null +++ b/Documentation/networking/timestamping/.gitignore @@ -0,0 +1 @@ +timestamping diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile new file mode 100644 index 000000000000..2a1489fdc036 --- /dev/null +++ b/Documentation/networking/timestamping/Makefile @@ -0,0 +1,6 @@ +CPPFLAGS = -I../../../include + +timestamping: timestamping.c + +clean: + rm -f timestamping diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c new file mode 100644 index 000000000000..43d143104210 --- /dev/null +++ b/Documentation/networking/timestamping/timestamping.c @@ -0,0 +1,533 @@ +/* + * This program demonstrates how the various time stamping features in + * the Linux kernel work. It emulates the behavior of a PTP + * implementation in stand-alone master mode by sending PTPv1 Sync + * multicasts once every second. It looks for similar packets, but + * beyond that doesn't actually implement PTP. + * + * Outgoing packets are time stamped with SO_TIMESTAMPING with or + * without hardware support. + * + * Incoming packets are time stamped with SO_TIMESTAMPING with or + * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and + * SO_TIMESTAMP[NS]. + * + * Copyright (C) 2009 Intel Corporation. + * Author: Patrick Ohly + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "asm/types.h" +#include "linux/net_tstamp.h" +#include "linux/errqueue.h" + +#ifndef SO_TIMESTAMPING +# define SO_TIMESTAMPING 37 +# define SCM_TIMESTAMPING SO_TIMESTAMPING +#endif + +#ifndef SO_TIMESTAMPNS +# define SO_TIMESTAMPNS 35 +#endif + +#ifndef SIOCGSTAMPNS +# define SIOCGSTAMPNS 0x8907 +#endif + +#ifndef SIOCSHWTSTAMP +# define SIOCSHWTSTAMP 0x89b0 +#endif + +static void usage(const char *error) +{ + if (error) + printf("invalid option: %s\n", error); + printf("timestamping interface option*\n\n" + "Options:\n" + " IP_MULTICAST_LOOP - looping outgoing multicasts\n" + " SO_TIMESTAMP - normal software time stamping, ms resolution\n" + " SO_TIMESTAMPNS - more accurate software time stamping\n" + " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" + " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" + " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" + " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" + " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" + " SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n" + " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SIOCGSTAMP - check last socket time stamp\n" + " SIOCGSTAMPNS - more accurate socket time stamp\n"); + exit(1); +} + +static void bail(const char *error) +{ + printf("%s: %s\n", error, strerror(errno)); + exit(1); +} + +static const unsigned char sync[] = { + 0x00, 0x01, 0x00, 0x01, + 0x5f, 0x44, 0x46, 0x4c, + 0x54, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x01, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, + 0x49, 0x05, 0xcd, 0x01, + 0x29, 0xb1, 0x8d, 0xb0, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +{ + struct timeval now; + int res; + + res = sendto(sock, sync, sizeof(sync), 0, + addr, addr_len); + gettimeofday(&now, 0); + if (res < 0) + printf("%s: %s\n", "send", strerror(errno)); + else + printf("%ld.%06ld: sent %d bytes\n", + (long)now.tv_sec, (long)now.tv_usec, + res); +} + +static void printpacket(struct msghdr *msg, int res, + char *data, + int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + struct cmsghdr *cmsg; + struct timeval tv; + struct timespec ts; + struct timeval now; + + gettimeofday(&now, 0); + + printf("%ld.%06ld: received %s data, %d bytes from %s, %d bytes control messages\n", + (long)now.tv_sec, (long)now.tv_usec, + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + res, + inet_ntoa(from_addr->sin_addr), + msg->msg_controllen); + for (cmsg = CMSG_FIRSTHDR(msg); + cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + printf(" cmsg len %d: ", cmsg->cmsg_len); + switch (cmsg->cmsg_level) { + case SOL_SOCKET: + printf("SOL_SOCKET "); + switch (cmsg->cmsg_type) { + case SO_TIMESTAMP: { + struct timeval *stamp = + (struct timeval *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMP %ld.%06ld", + (long)stamp->tv_sec, + (long)stamp->tv_usec); + break; + } + case SO_TIMESTAMPNS: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPNS %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + case SO_TIMESTAMPING: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPING "); + printf("SW %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + printf("HW transformed %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + printf("HW raw %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + case IPPROTO_IP: + printf("IPPROTO_IP "); + switch (cmsg->cmsg_type) { + case IP_RECVERR: { + struct sock_extended_err *err = + (struct sock_extended_err *)CMSG_DATA(cmsg); + printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", + strerror(err->ee_errno), + err->ee_origin, +#ifdef SO_EE_ORIGIN_TIMESTAMPING + err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? + "bounced packet" : "unexpected origin" +#else + "probably SO_EE_ORIGIN_TIMESTAMPING" +#endif + ); + if (res < sizeof(sync)) + printf(" => truncated data?!"); + else if (!memcmp(sync, data + res - sizeof(sync), + sizeof(sync))) + printf(" => GOT OUR DATA BACK (HURRAY!)"); + break; + } + case IP_PKTINFO: { + struct in_pktinfo *pktinfo = + (struct in_pktinfo *)CMSG_DATA(cmsg); + printf("IP_PKTINFO interface index %u", + pktinfo->ipi_ifindex); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + default: + printf("level %d type %d", + cmsg->cmsg_level, + cmsg->cmsg_type); + break; + } + printf("\n"); + } + + if (siocgstamp) { + if (ioctl(sock, SIOCGSTAMP, &tv)) + printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); + else + printf("SIOCGSTAMP %ld.%06ld\n", + (long)tv.tv_sec, + (long)tv.tv_usec); + } + if (siocgstampns) { + if (ioctl(sock, SIOCGSTAMPNS, &ts)) + printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); + else + printf("SIOCGSTAMPNS %ld.%09ld\n", + (long)ts.tv_sec, + (long)ts.tv_nsec); + } +} + +static void recvpacket(int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + char data[256]; + struct msghdr msg; + struct iovec entry; + struct sockaddr_in from_addr; + struct { + struct cmsghdr cm; + char control[512]; + } control; + int res; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + entry.iov_base = data; + entry.iov_len = sizeof(data); + msg.msg_name = (caddr_t)&from_addr; + msg.msg_namelen = sizeof(from_addr); + msg.msg_control = &control; + msg.msg_controllen = sizeof(control); + + res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); + if (res < 0) { + printf("%s %s: %s\n", + "recvmsg", + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + strerror(errno)); + } else { + printpacket(&msg, res, data, + sock, recvmsg_flags, + siocgstamp, siocgstampns); + } +} + +int main(int argc, char **argv) +{ + int so_timestamping_flags = 0; + int so_timestamp = 0; + int so_timestampns = 0; + int siocgstamp = 0; + int siocgstampns = 0; + int ip_multicast_loop = 0; + char *interface; + int i; + int enabled = 1; + int sock; + struct ifreq device; + struct ifreq hwtstamp; + struct hwtstamp_config hwconfig, hwconfig_requested; + struct sockaddr_in addr; + struct ip_mreq imr; + struct in_addr iaddr; + int val; + socklen_t len; + struct timeval next; + + if (argc < 2) + usage(0); + interface = argv[1]; + + for (i = 2; i < argc; i++) { + if (!strcasecmp(argv[i], "SO_TIMESTAMP")) + so_timestamp = 1; + else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) + so_timestampns = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMP")) + siocgstamp = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) + siocgstampns = 1; + else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) + ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else + usage(argv[i]); + } + + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (socket < 0) + bail("socket"); + + memset(&device, 0, sizeof(device)); + strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); + if (ioctl(sock, SIOCGIFADDR, &device) < 0) + bail("getting interface IP address"); + + memset(&hwtstamp, 0, sizeof(hwtstamp)); + strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); + hwtstamp.ifr_data = (void *)&hwconfig; + memset(&hwconfig, 0, sizeof(&hwconfig)); + hwconfig.tx_type = + (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + hwconfig.rx_filter = + (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; + hwconfig_requested = hwconfig; + if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { + if ((errno == EINVAL || errno == ENOTSUP) && + hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && + hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) + printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); + else + bail("SIOCSHWTSTAMP"); + } + printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", + hwconfig_requested.tx_type, hwconfig.tx_type, + hwconfig_requested.rx_filter, hwconfig.rx_filter); + + /* bind to PTP port */ + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(319 /* PTP event port */); + if (bind(sock, + (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)) < 0) + bail("bind"); + + /* set multicast group for outgoing packets */ + inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ + addr.sin_addr = iaddr; + imr.imr_multiaddr.s_addr = iaddr.s_addr; + imr.imr_interface.s_addr = + ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, + &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) + bail("set multicast"); + + /* join multicast group, loop our own packet */ + if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)) < 0) + bail("join multicast group"); + + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, + &ip_multicast_loop, sizeof(enabled)) < 0) { + bail("loop multicast"); + } + + /* set socket options for time stamping */ + if (so_timestamp && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMP"); + + if (so_timestampns && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMPNS"); + + if (so_timestamping_flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, + &so_timestamping_flags, + sizeof(so_timestamping_flags)) < 0) + bail("setsockopt SO_TIMESTAMPING"); + + /* request IP_PKTINFO for debugging purposes */ + if (setsockopt(sock, SOL_IP, IP_PKTINFO, + &enabled, sizeof(enabled)) < 0) + printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); + + /* verify socket options */ + len = sizeof(val); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); + else + printf("SO_TIMESTAMP %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", + strerror(errno)); + else + printf("SO_TIMESTAMPNS %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", + strerror(errno)); + } else { + printf("SO_TIMESTAMPING %d\n", val); + if (val != so_timestamping_flags) + printf(" not the expected value %d\n", + so_timestamping_flags); + } + + /* send packets forever every five seconds */ + gettimeofday(&next, 0); + next.tv_sec = (next.tv_sec + 1) / 5 * 5; + next.tv_usec = 0; + while (1) { + struct timeval now; + struct timeval delta; + long delta_us; + int res; + fd_set readfs, errorfs; + + gettimeofday(&now, 0); + delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + + (long)(next.tv_usec - now.tv_usec); + if (delta_us > 0) { + /* continue waiting for timeout or data */ + delta.tv_sec = delta_us / 1000000; + delta.tv_usec = delta_us % 1000000; + + FD_ZERO(&readfs); + FD_ZERO(&errorfs); + FD_SET(sock, &readfs); + FD_SET(sock, &errorfs); + printf("%ld.%06ld: select %ldus\n", + (long)now.tv_sec, (long)now.tv_usec, + delta_us); + res = select(sock + 1, &readfs, 0, &errorfs, &delta); + gettimeofday(&now, 0); + printf("%ld.%06ld: select returned: %d, %s\n", + (long)now.tv_sec, (long)now.tv_usec, + res, + res < 0 ? strerror(errno) : "success"); + if (res > 0) { + if (FD_ISSET(sock, &readfs)) + printf("ready for reading\n"); + if (FD_ISSET(sock, &errorfs)) + printf("has error\n"); + recvpacket(sock, 0, + siocgstamp, + siocgstampns); + recvpacket(sock, MSG_ERRQUEUE, + siocgstamp, + siocgstampns); + } + } else { + /* write one packet */ + sendpacket(sock, + (struct sockaddr *)&addr, + sizeof(addr)); + next.tv_sec += 5; + continue; + } + } + + return 0; +} diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index a1057c2d95e7..3641ec1452f4 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -62,6 +62,9 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index 6817be9573a6..537de4e0ef50 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 35863f260929..04c860619700 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/blackfin/include/asm/socket.h b/arch/blackfin/include/asm/socket.h index 2ca702e44d47..fac7fe9e1f8a 100644 --- a/arch/blackfin/include/asm/socket.h +++ b/arch/blackfin/include/asm/socket.h @@ -53,4 +53,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 9df0ca82f5de..d5cf74005408 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -56,6 +56,9 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index da2520dbf254..602518a70a1a 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index d5ef0aa3e312..745421225ec6 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -63,4 +63,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index dbc64e92c41a..ca87f938b03f 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index facc2d7a87ca..2abca1780169 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -75,6 +75,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index fba402c95ac2..885472bf7b78 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -54,6 +54,9 @@ #define SO_MARK 0x401f +#define SO_TIMESTAMPING 0x4020 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index f5a4e168e498..1e5cfad0e3f7 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -61,4 +61,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index c786ab623b2d..02330c50241b 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -62,4 +62,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h index 6d4bf6512959..345653b96826 100644 --- a/arch/sh/include/asm/socket.h +++ b/arch/sh/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* __ASM_SH_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index bf50d0c2d583..982a12f959f4 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -50,6 +50,9 @@ #define SO_MARK 0x0022 +#define SO_TIMESTAMPING 0x0023 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h index 8ab9cc8b2ecc..ca8bf2cd0ba9 100644 --- a/arch/x86/include/asm/socket.h +++ b/arch/x86/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_X86_SOCKET_H */ diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index 6100682b1da2..dd1a7a4a1cea 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -65,4 +65,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h index e51ca67b9356..57c3d4054e8b 100644 --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h @@ -54,5 +54,8 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h index 9a0e20012224..be7ed589af5c 100644 --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/include/asm-mn10300/socket.h b/include/asm-mn10300/socket.h index 80af9c4ccad7..fb5daf438ec9 100644 --- a/include/asm-mn10300/socket.h +++ b/include/asm-mn10300/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index ceb1454b6977..ec12cc74366f 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -18,6 +18,7 @@ struct sock_extended_err #define SO_EE_ORIGIN_LOCAL 1 #define SO_EE_ORIGIN_ICMP 2 #define SO_EE_ORIGIN_ICMP6 3 +#define SO_EE_ORIGIN_TIMESTAMPING 4 #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h new file mode 100644 index 000000000000..a3b8546354ac --- /dev/null +++ b/include/linux/net_tstamp.h @@ -0,0 +1,104 @@ +/* + * Userspace API for hardware time stamping of network packets + * + * Copyright (C) 2008,2009 Intel Corporation + * Author: Patrick Ohly + * + */ + +#ifndef _NET_TIMESTAMPING_H +#define _NET_TIMESTAMPING_H + +#include /* for SO_TIMESTAMPING */ + +/* SO_TIMESTAMPING gets an integer bit field comprised of these values */ +enum { + SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), + SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), + SOF_TIMESTAMPING_RX_HARDWARE = (1<<2), + SOF_TIMESTAMPING_RX_SOFTWARE = (1<<3), + SOF_TIMESTAMPING_SOFTWARE = (1<<4), + SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5), + SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), + SOF_TIMESTAMPING_MASK = + (SOF_TIMESTAMPING_RAW_HARDWARE - 1) | + SOF_TIMESTAMPING_RAW_HARDWARE +}; + +/** + * struct hwtstamp_config - %SIOCSHWTSTAMP parameter + * + * @flags: no flags defined right now, must be zero + * @tx_type: one of HWTSTAMP_TX_* + * @rx_type: one of one of HWTSTAMP_FILTER_* + * + * %SIOCSHWTSTAMP expects a &struct ifreq with a ifr_data pointer to + * this structure. dev_ifsioc() in the kernel takes care of the + * translation between 32 bit userspace and 64 bit kernel. The + * structure is intentionally chosen so that it has the same layout on + * 32 and 64 bit systems, don't break this! + */ +struct hwtstamp_config { + int flags; + int tx_type; + int rx_filter; +}; + +/* possible values for hwtstamp_config->tx_type */ +enum { + /* + * No outgoing packet will need hardware time stamping; + * should a packet arrive which asks for it, no hardware + * time stamping will be done. + */ + HWTSTAMP_TX_OFF, + + /* + * Enables hardware time stamping for outgoing packets; + * the sender of the packet decides which are to be + * time stamped by setting %SOF_TIMESTAMPING_TX_SOFTWARE + * before sending the packet. + */ + HWTSTAMP_TX_ON, +}; + +/* possible values for hwtstamp_config->rx_filter */ +enum { + /* time stamp no incoming packet at all */ + HWTSTAMP_FILTER_NONE, + + /* time stamp any incoming packet */ + HWTSTAMP_FILTER_ALL, + + /* return value: time stamp all packets requested plus some others */ + HWTSTAMP_FILTER_SOME, + + /* PTP v1, UDP, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V1_L4_EVENT, + /* PTP v1, UDP, Sync packet */ + HWTSTAMP_FILTER_PTP_V1_L4_SYNC, + /* PTP v1, UDP, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ, + /* PTP v2, UDP, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V2_L4_EVENT, + /* PTP v2, UDP, Sync packet */ + HWTSTAMP_FILTER_PTP_V2_L4_SYNC, + /* PTP v2, UDP, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ, + + /* 802.AS1, Ethernet, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V2_L2_EVENT, + /* 802.AS1, Ethernet, Sync packet */ + HWTSTAMP_FILTER_PTP_V2_L2_SYNC, + /* 802.AS1, Ethernet, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ, + + /* PTP v2/802.AS1, any layer, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V2_EVENT, + /* PTP v2/802.AS1, any layer, Sync packet */ + HWTSTAMP_FILTER_PTP_V2_SYNC, + /* PTP v2/802.AS1, any layer, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V2_DELAY_REQ, +}; + +#endif /* _NET_TIMESTAMPING_H */ diff --git a/include/linux/sockios.h b/include/linux/sockios.h index abef7596655a..241f179347d9 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -122,6 +122,9 @@ #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ +/* hardware time stamping: parameters in linux/net_tstamp.h */ +#define SIOCSHWTSTAMP 0x89b0 + /* Device private ioctl calls */ /* -- cgit v1.2.3-58-ga151 From 049359d655277c382683a6030ae0bac485568ffc Mon Sep 17 00:00:00 2001 From: James Hsiao Date: Thu, 5 Feb 2009 16:18:13 +1100 Subject: crypto: amcc - Add crypt4xx driver This patch adds support for AMCC ppc4xx security device driver. This is the initial release that includes the driver framework with AES and SHA1 algorithms support. The remaining algorithms will be released in the near future. Signed-off-by: James Hsiao Signed-off-by: Herbert Xu --- arch/powerpc/boot/dts/canyonlands.dts | 7 + arch/powerpc/boot/dts/kilauea.dts | 7 + drivers/crypto/Kconfig | 9 + drivers/crypto/Makefile | 1 + drivers/crypto/amcc/Makefile | 2 + drivers/crypto/amcc/crypto4xx_alg.c | 293 +++++++ drivers/crypto/amcc/crypto4xx_core.c | 1310 +++++++++++++++++++++++++++++++ drivers/crypto/amcc/crypto4xx_core.h | 177 +++++ drivers/crypto/amcc/crypto4xx_reg_def.h | 284 +++++++ drivers/crypto/amcc/crypto4xx_sa.c | 108 +++ drivers/crypto/amcc/crypto4xx_sa.h | 243 ++++++ 11 files changed, 2441 insertions(+) create mode 100644 drivers/crypto/amcc/Makefile create mode 100644 drivers/crypto/amcc/crypto4xx_alg.c create mode 100644 drivers/crypto/amcc/crypto4xx_core.c create mode 100644 drivers/crypto/amcc/crypto4xx_core.h create mode 100644 drivers/crypto/amcc/crypto4xx_reg_def.h create mode 100644 drivers/crypto/amcc/crypto4xx_sa.c create mode 100644 drivers/crypto/amcc/crypto4xx_sa.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 8b5ba8261a36..4447def69dc5 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -127,6 +127,13 @@ dcr-reg = <0x010 0x002>; }; + CRYPTO: crypto@180000 { + compatible = "amcc,ppc460ex-crypto", "amcc,ppc4xx-crypto"; + reg = <4 0x00180000 0x80400>; + interrupt-parent = <&UIC0>; + interrupts = <0x1d 0x4>; + }; + MAL0: mcmal { compatible = "ibm,mcmal-460ex", "ibm,mcmal2"; dcr-reg = <0x180 0x062>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 2804444812e5..5e6b08ff6f67 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -97,6 +97,13 @@ 0x6 0x4>; /* ECC SEC Error */ }; + CRYPTO: crypto@ef700000 { + compatible = "amcc,ppc405ex-crypto", "amcc,ppc4xx-crypto"; + reg = <0xef700000 0x80400>; + interrupt-parent = <&UIC0>; + interrupts = <0x17 0x2>; + }; + MAL0: mcmal { compatible = "ibm,mcmal-405ex", "ibm,mcmal2"; dcr-reg = <0x180 0x062>; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 5adbae71454f..01afd758072f 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -200,4 +200,13 @@ config CRYPTO_DEV_IXP4XX help Driver for the IXP4xx NPE crypto engine. +config CRYPTO_DEV_PPC4XX + tristate "Driver AMCC PPC4xx crypto accelerator" + depends on PPC && 4xx + select CRYPTO_HASH + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + help + This option allows you to have support for AMCC crypto acceleration. + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 73557b2968d3..9bf4a2bc8846 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o +obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ diff --git a/drivers/crypto/amcc/Makefile b/drivers/crypto/amcc/Makefile new file mode 100644 index 000000000000..aa376e8d5ed5 --- /dev/null +++ b/drivers/crypto/amcc/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += crypto4xx.o +crypto4xx-objs := crypto4xx_core.o crypto4xx_alg.o crypto4xx_sa.o diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c new file mode 100644 index 000000000000..61b6e1bec8c6 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_alg.c @@ -0,0 +1,293 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This file implements the Linux crypto algorithms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "crypto4xx_reg_def.h" +#include "crypto4xx_sa.h" +#include "crypto4xx_core.h" + +void set_dynamic_sa_command_0(struct dynamic_sa_ctl *sa, u32 save_h, + u32 save_iv, u32 ld_h, u32 ld_iv, u32 hdr_proc, + u32 h, u32 c, u32 pad_type, u32 op_grp, u32 op, + u32 dir) +{ + sa->sa_command_0.w = 0; + sa->sa_command_0.bf.save_hash_state = save_h; + sa->sa_command_0.bf.save_iv = save_iv; + sa->sa_command_0.bf.load_hash_state = ld_h; + sa->sa_command_0.bf.load_iv = ld_iv; + sa->sa_command_0.bf.hdr_proc = hdr_proc; + sa->sa_command_0.bf.hash_alg = h; + sa->sa_command_0.bf.cipher_alg = c; + sa->sa_command_0.bf.pad_type = pad_type & 3; + sa->sa_command_0.bf.extend_pad = pad_type >> 2; + sa->sa_command_0.bf.op_group = op_grp; + sa->sa_command_0.bf.opcode = op; + sa->sa_command_0.bf.dir = dir; +} + +void set_dynamic_sa_command_1(struct dynamic_sa_ctl *sa, u32 cm, u32 hmac_mc, + u32 cfb, u32 esn, u32 sn_mask, u32 mute, + u32 cp_pad, u32 cp_pay, u32 cp_hdr) +{ + sa->sa_command_1.w = 0; + sa->sa_command_1.bf.crypto_mode31 = (cm & 4) >> 2; + sa->sa_command_1.bf.crypto_mode9_8 = cm & 3; + sa->sa_command_1.bf.feedback_mode = cfb, + sa->sa_command_1.bf.sa_rev = 1; + sa->sa_command_1.bf.extended_seq_num = esn; + sa->sa_command_1.bf.seq_num_mask = sn_mask; + sa->sa_command_1.bf.mutable_bit_proc = mute; + sa->sa_command_1.bf.copy_pad = cp_pad; + sa->sa_command_1.bf.copy_payload = cp_pay; + sa->sa_command_1.bf.copy_hdr = cp_hdr; +} + +int crypto4xx_encrypt(struct ablkcipher_request *req) +{ + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + ctx->direction = DIR_OUTBOUND; + ctx->hash_final = 0; + ctx->is_hash = 0; + ctx->pd_ctl = 0x1; + + return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst, + req->nbytes, req->info, + get_dynamic_sa_iv_size(ctx)); +} + +int crypto4xx_decrypt(struct ablkcipher_request *req) +{ + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + ctx->direction = DIR_INBOUND; + ctx->hash_final = 0; + ctx->is_hash = 0; + ctx->pd_ctl = 1; + + return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst, + req->nbytes, req->info, + get_dynamic_sa_iv_size(ctx)); +} + +/** + * AES Functions + */ +static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher, + const u8 *key, + unsigned int keylen, + unsigned char cm, + u8 fb) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm); + struct dynamic_sa_ctl *sa; + int rc; + + if (keylen != AES_KEYSIZE_256 && + keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_128) { + crypto_ablkcipher_set_flags(cipher, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* Create SA */ + if (ctx->sa_in_dma_addr || ctx->sa_out_dma_addr) + crypto4xx_free_sa(ctx); + + rc = crypto4xx_alloc_sa(ctx, SA_AES128_LEN + (keylen-16) / 4); + if (rc) + return rc; + + if (ctx->state_record_dma_addr == 0) { + rc = crypto4xx_alloc_state_record(ctx); + if (rc) { + crypto4xx_free_sa(ctx); + return rc; + } + } + /* Setup SA */ + sa = (struct dynamic_sa_ctl *) ctx->sa_in; + ctx->hash_final = 0; + + set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV, + SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE, + SA_NO_HEADER_PROC, SA_HASH_ALG_NULL, + SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO, + SA_OP_GROUP_BASIC, SA_OPCODE_DECRYPT, + DIR_INBOUND); + + set_dynamic_sa_command_1(sa, cm, SA_HASH_MODE_HASH, + fb, SA_EXTENDED_SN_OFF, + SA_SEQ_MASK_OFF, SA_MC_ENABLE, + SA_NOT_COPY_PAD, SA_NOT_COPY_PAYLOAD, + SA_NOT_COPY_HDR); + crypto4xx_memcpy_le(ctx->sa_in + get_dynamic_sa_offset_key_field(ctx), + key, keylen); + sa->sa_contents = SA_AES_CONTENTS | (keylen << 2); + sa->sa_command_1.bf.key_len = keylen >> 3; + ctx->is_hash = 0; + ctx->direction = DIR_INBOUND; + memcpy(ctx->sa_in + get_dynamic_sa_offset_state_ptr_field(ctx), + (void *)&ctx->state_record_dma_addr, 4); + ctx->offset_to_sr_ptr = get_dynamic_sa_offset_state_ptr_field(ctx); + + memcpy(ctx->sa_out, ctx->sa_in, ctx->sa_len * 4); + sa = (struct dynamic_sa_ctl *) ctx->sa_out; + sa->sa_command_0.bf.dir = DIR_OUTBOUND; + + return 0; +} + +int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher, + const u8 *key, unsigned int keylen) +{ + return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CBC, + CRYPTO_FEEDBACK_MODE_NO_FB); +} + +/** + * HASH SHA1 Functions + */ +static int crypto4xx_hash_alg_init(struct crypto_tfm *tfm, + unsigned int sa_len, + unsigned char ha, + unsigned char hm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct crypto4xx_alg *my_alg = crypto_alg_to_crypto4xx_alg(alg); + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm); + struct dynamic_sa_ctl *sa; + struct dynamic_sa_hash160 *sa_in; + int rc; + + ctx->dev = my_alg->dev; + ctx->is_hash = 1; + ctx->hash_final = 0; + + /* Create SA */ + if (ctx->sa_in_dma_addr || ctx->sa_out_dma_addr) + crypto4xx_free_sa(ctx); + + rc = crypto4xx_alloc_sa(ctx, sa_len); + if (rc) + return rc; + + if (ctx->state_record_dma_addr == 0) { + crypto4xx_alloc_state_record(ctx); + if (!ctx->state_record_dma_addr) { + crypto4xx_free_sa(ctx); + return -ENOMEM; + } + } + + tfm->crt_ahash.reqsize = sizeof(struct crypto4xx_ctx); + sa = (struct dynamic_sa_ctl *) ctx->sa_in; + set_dynamic_sa_command_0(sa, SA_SAVE_HASH, SA_NOT_SAVE_IV, + SA_NOT_LOAD_HASH, SA_LOAD_IV_FROM_SA, + SA_NO_HEADER_PROC, ha, SA_CIPHER_ALG_NULL, + SA_PAD_TYPE_ZERO, SA_OP_GROUP_BASIC, + SA_OPCODE_HASH, DIR_INBOUND); + set_dynamic_sa_command_1(sa, 0, SA_HASH_MODE_HASH, + CRYPTO_FEEDBACK_MODE_NO_FB, SA_EXTENDED_SN_OFF, + SA_SEQ_MASK_OFF, SA_MC_ENABLE, + SA_NOT_COPY_PAD, SA_NOT_COPY_PAYLOAD, + SA_NOT_COPY_HDR); + ctx->direction = DIR_INBOUND; + sa->sa_contents = SA_HASH160_CONTENTS; + sa_in = (struct dynamic_sa_hash160 *) ctx->sa_in; + /* Need to zero hash digest in SA */ + memset(sa_in->inner_digest, 0, sizeof(sa_in->inner_digest)); + memset(sa_in->outer_digest, 0, sizeof(sa_in->outer_digest)); + sa_in->state_ptr = ctx->state_record_dma_addr; + ctx->offset_to_sr_ptr = get_dynamic_sa_offset_state_ptr_field(ctx); + + return 0; +} + +int crypto4xx_hash_init(struct ahash_request *req) +{ + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + int ds; + struct dynamic_sa_ctl *sa; + + sa = (struct dynamic_sa_ctl *) ctx->sa_in; + ds = crypto_ahash_digestsize( + __crypto_ahash_cast(req->base.tfm)); + sa->sa_command_0.bf.digest_len = ds >> 2; + sa->sa_command_0.bf.load_hash_state = SA_LOAD_HASH_FROM_SA; + ctx->is_hash = 1; + ctx->direction = DIR_INBOUND; + + return 0; +} + +int crypto4xx_hash_update(struct ahash_request *req) +{ + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + ctx->is_hash = 1; + ctx->hash_final = 0; + ctx->pd_ctl = 0x11; + ctx->direction = DIR_INBOUND; + + return crypto4xx_build_pd(&req->base, ctx, req->src, + (struct scatterlist *) req->result, + req->nbytes, NULL, 0); +} + +int crypto4xx_hash_final(struct ahash_request *req) +{ + return 0; +} + +int crypto4xx_hash_digest(struct ahash_request *req) +{ + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + ctx->hash_final = 1; + ctx->pd_ctl = 0x11; + ctx->direction = DIR_INBOUND; + + return crypto4xx_build_pd(&req->base, ctx, req->src, + (struct scatterlist *) req->result, + req->nbytes, NULL, 0); +} + +/** + * SHA1 Algorithm + */ +int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm) +{ + return crypto4xx_hash_alg_init(tfm, SA_HASH160_LEN, SA_HASH_ALG_SHA1, + SA_HASH_MODE_HASH); +} + + diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c new file mode 100644 index 000000000000..4c0dfb2b872e --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -0,0 +1,1310 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This file implements AMCC crypto offload Linux device driver for use with + * Linux CryptoAPI. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "crypto4xx_reg_def.h" +#include "crypto4xx_core.h" +#include "crypto4xx_sa.h" + +#define PPC4XX_SEC_VERSION_STR "0.5" + +/** + * PPC4xx Crypto Engine Initialization Routine + */ +static void crypto4xx_hw_init(struct crypto4xx_device *dev) +{ + union ce_ring_size ring_size; + union ce_ring_contol ring_ctrl; + union ce_part_ring_size part_ring_size; + union ce_io_threshold io_threshold; + u32 rand_num; + union ce_pe_dma_cfg pe_dma_cfg; + + writel(PPC4XX_BYTE_ORDER, dev->ce_base + CRYPTO4XX_BYTE_ORDER_CFG); + /* setup pe dma, include reset sg, pdr and pe, then release reset */ + pe_dma_cfg.w = 0; + pe_dma_cfg.bf.bo_sgpd_en = 1; + pe_dma_cfg.bf.bo_data_en = 0; + pe_dma_cfg.bf.bo_sa_en = 1; + pe_dma_cfg.bf.bo_pd_en = 1; + pe_dma_cfg.bf.dynamic_sa_en = 1; + pe_dma_cfg.bf.reset_sg = 1; + pe_dma_cfg.bf.reset_pdr = 1; + pe_dma_cfg.bf.reset_pe = 1; + writel(pe_dma_cfg.w, dev->ce_base + CRYPTO4XX_PE_DMA_CFG); + /* un reset pe,sg and pdr */ + pe_dma_cfg.bf.pe_mode = 0; + pe_dma_cfg.bf.reset_sg = 0; + pe_dma_cfg.bf.reset_pdr = 0; + pe_dma_cfg.bf.reset_pe = 0; + pe_dma_cfg.bf.bo_td_en = 0; + writel(pe_dma_cfg.w, dev->ce_base + CRYPTO4XX_PE_DMA_CFG); + writel(dev->pdr_pa, dev->ce_base + CRYPTO4XX_PDR_BASE); + writel(dev->pdr_pa, dev->ce_base + CRYPTO4XX_RDR_BASE); + writel(PPC4XX_PRNG_CTRL_AUTO_EN, dev->ce_base + CRYPTO4XX_PRNG_CTRL); + get_random_bytes(&rand_num, sizeof(rand_num)); + writel(rand_num, dev->ce_base + CRYPTO4XX_PRNG_SEED_L); + get_random_bytes(&rand_num, sizeof(rand_num)); + writel(rand_num, dev->ce_base + CRYPTO4XX_PRNG_SEED_H); + ring_size.w = 0; + ring_size.bf.ring_offset = PPC4XX_PD_SIZE; + ring_size.bf.ring_size = PPC4XX_NUM_PD; + writel(ring_size.w, dev->ce_base + CRYPTO4XX_RING_SIZE); + ring_ctrl.w = 0; + writel(ring_ctrl.w, dev->ce_base + CRYPTO4XX_RING_CTRL); + writel(PPC4XX_DC_3DES_EN, dev->ce_base + CRYPTO4XX_DEVICE_CTRL); + writel(dev->gdr_pa, dev->ce_base + CRYPTO4XX_GATH_RING_BASE); + writel(dev->sdr_pa, dev->ce_base + CRYPTO4XX_SCAT_RING_BASE); + part_ring_size.w = 0; + part_ring_size.bf.sdr_size = PPC4XX_SDR_SIZE; + part_ring_size.bf.gdr_size = PPC4XX_GDR_SIZE; + writel(part_ring_size.w, dev->ce_base + CRYPTO4XX_PART_RING_SIZE); + writel(PPC4XX_SD_BUFFER_SIZE, dev->ce_base + CRYPTO4XX_PART_RING_CFG); + io_threshold.w = 0; + io_threshold.bf.output_threshold = PPC4XX_OUTPUT_THRESHOLD; + io_threshold.bf.input_threshold = PPC4XX_INPUT_THRESHOLD; + writel(io_threshold.w, dev->ce_base + CRYPTO4XX_IO_THRESHOLD); + writel(0, dev->ce_base + CRYPTO4XX_PDR_BASE_UADDR); + writel(0, dev->ce_base + CRYPTO4XX_RDR_BASE_UADDR); + writel(0, dev->ce_base + CRYPTO4XX_PKT_SRC_UADDR); + writel(0, dev->ce_base + CRYPTO4XX_PKT_DEST_UADDR); + writel(0, dev->ce_base + CRYPTO4XX_SA_UADDR); + writel(0, dev->ce_base + CRYPTO4XX_GATH_RING_BASE_UADDR); + writel(0, dev->ce_base + CRYPTO4XX_SCAT_RING_BASE_UADDR); + /* un reset pe,sg and pdr */ + pe_dma_cfg.bf.pe_mode = 1; + pe_dma_cfg.bf.reset_sg = 0; + pe_dma_cfg.bf.reset_pdr = 0; + pe_dma_cfg.bf.reset_pe = 0; + pe_dma_cfg.bf.bo_td_en = 0; + writel(pe_dma_cfg.w, dev->ce_base + CRYPTO4XX_PE_DMA_CFG); + /*clear all pending interrupt*/ + writel(PPC4XX_INTERRUPT_CLR, dev->ce_base + CRYPTO4XX_INT_CLR); + writel(PPC4XX_INT_DESCR_CNT, dev->ce_base + CRYPTO4XX_INT_DESCR_CNT); + writel(PPC4XX_INT_DESCR_CNT, dev->ce_base + CRYPTO4XX_INT_DESCR_CNT); + writel(PPC4XX_INT_CFG, dev->ce_base + CRYPTO4XX_INT_CFG); + writel(PPC4XX_PD_DONE_INT, dev->ce_base + CRYPTO4XX_INT_EN); +} + +int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size) +{ + ctx->sa_in = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4, + &ctx->sa_in_dma_addr, GFP_ATOMIC); + if (ctx->sa_in == NULL) + return -ENOMEM; + + ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4, + &ctx->sa_out_dma_addr, GFP_ATOMIC); + if (ctx->sa_out == NULL) { + dma_free_coherent(ctx->dev->core_dev->device, + ctx->sa_len * 4, + ctx->sa_in, ctx->sa_in_dma_addr); + return -ENOMEM; + } + + memset(ctx->sa_in, 0, size * 4); + memset(ctx->sa_out, 0, size * 4); + ctx->sa_len = size; + + return 0; +} + +void crypto4xx_free_sa(struct crypto4xx_ctx *ctx) +{ + if (ctx->sa_in != NULL) + dma_free_coherent(ctx->dev->core_dev->device, ctx->sa_len * 4, + ctx->sa_in, ctx->sa_in_dma_addr); + if (ctx->sa_out != NULL) + dma_free_coherent(ctx->dev->core_dev->device, ctx->sa_len * 4, + ctx->sa_out, ctx->sa_out_dma_addr); + + ctx->sa_in_dma_addr = 0; + ctx->sa_out_dma_addr = 0; + ctx->sa_len = 0; +} + +u32 crypto4xx_alloc_state_record(struct crypto4xx_ctx *ctx) +{ + ctx->state_record = dma_alloc_coherent(ctx->dev->core_dev->device, + sizeof(struct sa_state_record), + &ctx->state_record_dma_addr, GFP_ATOMIC); + if (!ctx->state_record_dma_addr) + return -ENOMEM; + memset(ctx->state_record, 0, sizeof(struct sa_state_record)); + + return 0; +} + +void crypto4xx_free_state_record(struct crypto4xx_ctx *ctx) +{ + if (ctx->state_record != NULL) + dma_free_coherent(ctx->dev->core_dev->device, + sizeof(struct sa_state_record), + ctx->state_record, + ctx->state_record_dma_addr); + ctx->state_record_dma_addr = 0; +} + +/** + * alloc memory for the gather ring + * no need to alloc buf for the ring + * gdr_tail, gdr_head and gdr_count are initialized by this function + */ +static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev) +{ + int i; + struct pd_uinfo *pd_uinfo; + dev->pdr = dma_alloc_coherent(dev->core_dev->device, + sizeof(struct ce_pd) * PPC4XX_NUM_PD, + &dev->pdr_pa, GFP_ATOMIC); + if (!dev->pdr) + return -ENOMEM; + + dev->pdr_uinfo = kzalloc(sizeof(struct pd_uinfo) * PPC4XX_NUM_PD, + GFP_KERNEL); + if (!dev->pdr_uinfo) { + dma_free_coherent(dev->core_dev->device, + sizeof(struct ce_pd) * PPC4XX_NUM_PD, + dev->pdr, + dev->pdr_pa); + return -ENOMEM; + } + memset(dev->pdr, 0, sizeof(struct ce_pd) * PPC4XX_NUM_PD); + dev->shadow_sa_pool = dma_alloc_coherent(dev->core_dev->device, + 256 * PPC4XX_NUM_PD, + &dev->shadow_sa_pool_pa, + GFP_ATOMIC); + if (!dev->shadow_sa_pool) + return -ENOMEM; + + dev->shadow_sr_pool = dma_alloc_coherent(dev->core_dev->device, + sizeof(struct sa_state_record) * PPC4XX_NUM_PD, + &dev->shadow_sr_pool_pa, GFP_ATOMIC); + if (!dev->shadow_sr_pool) + return -ENOMEM; + for (i = 0; i < PPC4XX_NUM_PD; i++) { + pd_uinfo = (struct pd_uinfo *) (dev->pdr_uinfo + + sizeof(struct pd_uinfo) * i); + + /* alloc 256 bytes which is enough for any kind of dynamic sa */ + pd_uinfo->sa_va = dev->shadow_sa_pool + 256 * i; + pd_uinfo->sa_pa = dev->shadow_sa_pool_pa + 256 * i; + + /* alloc state record */ + pd_uinfo->sr_va = dev->shadow_sr_pool + + sizeof(struct sa_state_record) * i; + pd_uinfo->sr_pa = dev->shadow_sr_pool_pa + + sizeof(struct sa_state_record) * i; + } + + return 0; +} + +static void crypto4xx_destroy_pdr(struct crypto4xx_device *dev) +{ + if (dev->pdr != NULL) + dma_free_coherent(dev->core_dev->device, + sizeof(struct ce_pd) * PPC4XX_NUM_PD, + dev->pdr, dev->pdr_pa); + if (dev->shadow_sa_pool) + dma_free_coherent(dev->core_dev->device, 256 * PPC4XX_NUM_PD, + dev->shadow_sa_pool, dev->shadow_sa_pool_pa); + if (dev->shadow_sr_pool) + dma_free_coherent(dev->core_dev->device, + sizeof(struct sa_state_record) * PPC4XX_NUM_PD, + dev->shadow_sr_pool, dev->shadow_sr_pool_pa); + + kfree(dev->pdr_uinfo); +} + +static u32 crypto4xx_get_pd_from_pdr_nolock(struct crypto4xx_device *dev) +{ + u32 retval; + u32 tmp; + + retval = dev->pdr_head; + tmp = (dev->pdr_head + 1) % PPC4XX_NUM_PD; + + if (tmp == dev->pdr_tail) + return ERING_WAS_FULL; + + dev->pdr_head = tmp; + + return retval; +} + +static u32 crypto4xx_put_pd_to_pdr(struct crypto4xx_device *dev, u32 idx) +{ + struct pd_uinfo *pd_uinfo; + unsigned long flags; + + pd_uinfo = (struct pd_uinfo *)(dev->pdr_uinfo + + sizeof(struct pd_uinfo) * idx); + spin_lock_irqsave(&dev->core_dev->lock, flags); + if (dev->pdr_tail != PPC4XX_LAST_PD) + dev->pdr_tail++; + else + dev->pdr_tail = 0; + pd_uinfo->state = PD_ENTRY_FREE; + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + + return 0; +} + +static struct ce_pd *crypto4xx_get_pdp(struct crypto4xx_device *dev, + dma_addr_t *pd_dma, u32 idx) +{ + *pd_dma = dev->pdr_pa + sizeof(struct ce_pd) * idx; + + return dev->pdr + sizeof(struct ce_pd) * idx; +} + +/** + * alloc memory for the gather ring + * no need to alloc buf for the ring + * gdr_tail, gdr_head and gdr_count are initialized by this function + */ +static u32 crypto4xx_build_gdr(struct crypto4xx_device *dev) +{ + dev->gdr = dma_alloc_coherent(dev->core_dev->device, + sizeof(struct ce_gd) * PPC4XX_NUM_GD, + &dev->gdr_pa, GFP_ATOMIC); + if (!dev->gdr) + return -ENOMEM; + + memset(dev->gdr, 0, sizeof(struct ce_gd) * PPC4XX_NUM_GD); + + return 0; +} + +static inline void crypto4xx_destroy_gdr(struct crypto4xx_device *dev) +{ + dma_free_coherent(dev->core_dev->device, + sizeof(struct ce_gd) * PPC4XX_NUM_GD, + dev->gdr, dev->gdr_pa); +} + +/* + * when this function is called. + * preemption or interrupt must be disabled + */ +u32 crypto4xx_get_n_gd(struct crypto4xx_device *dev, int n) +{ + u32 retval; + u32 tmp; + if (n >= PPC4XX_NUM_GD) + return ERING_WAS_FULL; + + retval = dev->gdr_head; + tmp = (dev->gdr_head + n) % PPC4XX_NUM_GD; + if (dev->gdr_head > dev->gdr_tail) { + if (tmp < dev->gdr_head && tmp >= dev->gdr_tail) + return ERING_WAS_FULL; + } else if (dev->gdr_head < dev->gdr_tail) { + if (tmp < dev->gdr_head || tmp >= dev->gdr_tail) + return ERING_WAS_FULL; + } + dev->gdr_head = tmp; + + return retval; +} + +static u32 crypto4xx_put_gd_to_gdr(struct crypto4xx_device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->core_dev->lock, flags); + if (dev->gdr_tail == dev->gdr_head) { + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + return 0; + } + + if (dev->gdr_tail != PPC4XX_LAST_GD) + dev->gdr_tail++; + else + dev->gdr_tail = 0; + + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + + return 0; +} + +static inline struct ce_gd *crypto4xx_get_gdp(struct crypto4xx_device *dev, + dma_addr_t *gd_dma, u32 idx) +{ + *gd_dma = dev->gdr_pa + sizeof(struct ce_gd) * idx; + + return (struct ce_gd *) (dev->gdr + sizeof(struct ce_gd) * idx); +} + +/** + * alloc memory for the scatter ring + * need to alloc buf for the ring + * sdr_tail, sdr_head and sdr_count are initialized by this function + */ +static u32 crypto4xx_build_sdr(struct crypto4xx_device *dev) +{ + int i; + struct ce_sd *sd_array; + + /* alloc memory for scatter descriptor ring */ + dev->sdr = dma_alloc_coherent(dev->core_dev->device, + sizeof(struct ce_sd) * PPC4XX_NUM_SD, + &dev->sdr_pa, GFP_ATOMIC); + if (!dev->sdr) + return -ENOMEM; + + dev->scatter_buffer_size = PPC4XX_SD_BUFFER_SIZE; + dev->scatter_buffer_va = + dma_alloc_coherent(dev->core_dev->device, + dev->scatter_buffer_size * PPC4XX_NUM_SD, + &dev->scatter_buffer_pa, GFP_ATOMIC); + if (!dev->scatter_buffer_va) { + dma_free_coherent(dev->core_dev->device, + sizeof(struct ce_sd) * PPC4XX_NUM_SD, + dev->sdr, dev->sdr_pa); + return -ENOMEM; + } + + sd_array = dev->sdr; + + for (i = 0; i < PPC4XX_NUM_SD; i++) { + sd_array[i].ptr = dev->scatter_buffer_pa + + dev->scatter_buffer_size * i; + } + + return 0; +} + +static void crypto4xx_destroy_sdr(struct crypto4xx_device *dev) +{ + if (dev->sdr != NULL) + dma_free_coherent(dev->core_dev->device, + sizeof(struct ce_sd) * PPC4XX_NUM_SD, + dev->sdr, dev->sdr_pa); + + if (dev->scatter_buffer_va != NULL) + dma_free_coherent(dev->core_dev->device, + dev->scatter_buffer_size * PPC4XX_NUM_SD, + dev->scatter_buffer_va, + dev->scatter_buffer_pa); +} + +/* + * when this function is called. + * preemption or interrupt must be disabled + */ +static u32 crypto4xx_get_n_sd(struct crypto4xx_device *dev, int n) +{ + u32 retval; + u32 tmp; + + if (n >= PPC4XX_NUM_SD) + return ERING_WAS_FULL; + + retval = dev->sdr_head; + tmp = (dev->sdr_head + n) % PPC4XX_NUM_SD; + if (dev->sdr_head > dev->gdr_tail) { + if (tmp < dev->sdr_head && tmp >= dev->sdr_tail) + return ERING_WAS_FULL; + } else if (dev->sdr_head < dev->sdr_tail) { + if (tmp < dev->sdr_head || tmp >= dev->sdr_tail) + return ERING_WAS_FULL; + } /* the head = tail, or empty case is already take cared */ + dev->sdr_head = tmp; + + return retval; +} + +static u32 crypto4xx_put_sd_to_sdr(struct crypto4xx_device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->core_dev->lock, flags); + if (dev->sdr_tail == dev->sdr_head) { + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + return 0; + } + if (dev->sdr_tail != PPC4XX_LAST_SD) + dev->sdr_tail++; + else + dev->sdr_tail = 0; + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + + return 0; +} + +static inline struct ce_sd *crypto4xx_get_sdp(struct crypto4xx_device *dev, + dma_addr_t *sd_dma, u32 idx) +{ + *sd_dma = dev->sdr_pa + sizeof(struct ce_sd) * idx; + + return (struct ce_sd *)(dev->sdr + sizeof(struct ce_sd) * idx); +} + +static u32 crypto4xx_fill_one_page(struct crypto4xx_device *dev, + dma_addr_t *addr, u32 *length, + u32 *idx, u32 *offset, u32 *nbytes) +{ + u32 len; + + if (*length > dev->scatter_buffer_size) { + memcpy(phys_to_virt(*addr), + dev->scatter_buffer_va + + *idx * dev->scatter_buffer_size + *offset, + dev->scatter_buffer_size); + *offset = 0; + *length -= dev->scatter_buffer_size; + *nbytes -= dev->scatter_buffer_size; + if (*idx == PPC4XX_LAST_SD) + *idx = 0; + else + (*idx)++; + *addr = *addr + dev->scatter_buffer_size; + return 1; + } else if (*length < dev->scatter_buffer_size) { + memcpy(phys_to_virt(*addr), + dev->scatter_buffer_va + + *idx * dev->scatter_buffer_size + *offset, *length); + if ((*offset + *length) == dev->scatter_buffer_size) { + if (*idx == PPC4XX_LAST_SD) + *idx = 0; + else + (*idx)++; + *nbytes -= *length; + *offset = 0; + } else { + *nbytes -= *length; + *offset += *length; + } + + return 0; + } else { + len = (*nbytes <= dev->scatter_buffer_size) ? + (*nbytes) : dev->scatter_buffer_size; + memcpy(phys_to_virt(*addr), + dev->scatter_buffer_va + + *idx * dev->scatter_buffer_size + *offset, + len); + *offset = 0; + *nbytes -= len; + + if (*idx == PPC4XX_LAST_SD) + *idx = 0; + else + (*idx)++; + + return 0; + } +} + +static void crypto4xx_copy_pkt_to_dst(struct crypto4xx_device *dev, + struct ce_pd *pd, + struct pd_uinfo *pd_uinfo, + u32 nbytes, + struct scatterlist *dst) +{ + dma_addr_t addr; + u32 this_sd; + u32 offset; + u32 len; + u32 i; + u32 sg_len; + struct scatterlist *sg; + + this_sd = pd_uinfo->first_sd; + offset = 0; + i = 0; + + while (nbytes) { + sg = &dst[i]; + sg_len = sg->length; + addr = dma_map_page(dev->core_dev->device, sg_page(sg), + sg->offset, sg->length, DMA_TO_DEVICE); + + if (offset == 0) { + len = (nbytes <= sg->length) ? nbytes : sg->length; + while (crypto4xx_fill_one_page(dev, &addr, &len, + &this_sd, &offset, &nbytes)) + ; + if (!nbytes) + return; + i++; + } else { + len = (nbytes <= (dev->scatter_buffer_size - offset)) ? + nbytes : (dev->scatter_buffer_size - offset); + len = (sg->length < len) ? sg->length : len; + while (crypto4xx_fill_one_page(dev, &addr, &len, + &this_sd, &offset, &nbytes)) + ; + if (!nbytes) + return; + sg_len -= len; + if (sg_len) { + addr += len; + while (crypto4xx_fill_one_page(dev, &addr, + &sg_len, &this_sd, &offset, &nbytes)) + ; + } + i++; + } + } +} + +static u32 crypto4xx_copy_digest_to_dst(struct pd_uinfo *pd_uinfo, + struct crypto4xx_ctx *ctx) +{ + struct dynamic_sa_ctl *sa = (struct dynamic_sa_ctl *) ctx->sa_in; + struct sa_state_record *state_record = + (struct sa_state_record *) pd_uinfo->sr_va; + + if (sa->sa_command_0.bf.hash_alg == SA_HASH_ALG_SHA1) { + memcpy((void *) pd_uinfo->dest_va, state_record->save_digest, + SA_HASH_ALG_SHA1_DIGEST_SIZE); + } + + return 0; +} + +static void crypto4xx_ret_sg_desc(struct crypto4xx_device *dev, + struct pd_uinfo *pd_uinfo) +{ + int i; + if (pd_uinfo->num_gd) { + for (i = 0; i < pd_uinfo->num_gd; i++) + crypto4xx_put_gd_to_gdr(dev); + pd_uinfo->first_gd = 0xffffffff; + pd_uinfo->num_gd = 0; + } + if (pd_uinfo->num_sd) { + for (i = 0; i < pd_uinfo->num_sd; i++) + crypto4xx_put_sd_to_sdr(dev); + + pd_uinfo->first_sd = 0xffffffff; + pd_uinfo->num_sd = 0; + } +} + +static u32 crypto4xx_ablkcipher_done(struct crypto4xx_device *dev, + struct pd_uinfo *pd_uinfo, + struct ce_pd *pd) +{ + struct crypto4xx_ctx *ctx; + struct ablkcipher_request *ablk_req; + struct scatterlist *dst; + dma_addr_t addr; + + ablk_req = ablkcipher_request_cast(pd_uinfo->async_req); + ctx = crypto_tfm_ctx(ablk_req->base.tfm); + + if (pd_uinfo->using_sd) { + crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo, ablk_req->nbytes, + ablk_req->dst); + } else { + dst = pd_uinfo->dest_va; + addr = dma_map_page(dev->core_dev->device, sg_page(dst), + dst->offset, dst->length, DMA_FROM_DEVICE); + } + crypto4xx_ret_sg_desc(dev, pd_uinfo); + if (ablk_req->base.complete != NULL) + ablk_req->base.complete(&ablk_req->base, 0); + + return 0; +} + +static u32 crypto4xx_ahash_done(struct crypto4xx_device *dev, + struct pd_uinfo *pd_uinfo) +{ + struct crypto4xx_ctx *ctx; + struct ahash_request *ahash_req; + + ahash_req = ahash_request_cast(pd_uinfo->async_req); + ctx = crypto_tfm_ctx(ahash_req->base.tfm); + + crypto4xx_copy_digest_to_dst(pd_uinfo, + crypto_tfm_ctx(ahash_req->base.tfm)); + crypto4xx_ret_sg_desc(dev, pd_uinfo); + /* call user provided callback function x */ + if (ahash_req->base.complete != NULL) + ahash_req->base.complete(&ahash_req->base, 0); + + return 0; +} + +static u32 crypto4xx_pd_done(struct crypto4xx_device *dev, u32 idx) +{ + struct ce_pd *pd; + struct pd_uinfo *pd_uinfo; + + pd = dev->pdr + sizeof(struct ce_pd)*idx; + pd_uinfo = dev->pdr_uinfo + sizeof(struct pd_uinfo)*idx; + if (crypto_tfm_alg_type(pd_uinfo->async_req->tfm) == + CRYPTO_ALG_TYPE_ABLKCIPHER) + return crypto4xx_ablkcipher_done(dev, pd_uinfo, pd); + else + return crypto4xx_ahash_done(dev, pd_uinfo); +} + +/** + * Note: Only use this function to copy items that is word aligned. + */ +void crypto4xx_memcpy_le(unsigned int *dst, + const unsigned char *buf, + int len) +{ + u8 *tmp; + for (; len >= 4; buf += 4, len -= 4) + *dst++ = cpu_to_le32(*(unsigned int *) buf); + + tmp = (u8 *)dst; + switch (len) { + case 3: + *tmp++ = 0; + *tmp++ = *(buf+2); + *tmp++ = *(buf+1); + *tmp++ = *buf; + break; + case 2: + *tmp++ = 0; + *tmp++ = 0; + *tmp++ = *(buf+1); + *tmp++ = *buf; + break; + case 1: + *tmp++ = 0; + *tmp++ = 0; + *tmp++ = 0; + *tmp++ = *buf; + break; + default: + break; + } +} + +static void crypto4xx_stop_all(struct crypto4xx_core_device *core_dev) +{ + crypto4xx_destroy_pdr(core_dev->dev); + crypto4xx_destroy_gdr(core_dev->dev); + crypto4xx_destroy_sdr(core_dev->dev); + dev_set_drvdata(core_dev->device, NULL); + iounmap(core_dev->dev->ce_base); + kfree(core_dev->dev); + kfree(core_dev); +} + +void crypto4xx_return_pd(struct crypto4xx_device *dev, + u32 pd_entry, struct ce_pd *pd, + struct pd_uinfo *pd_uinfo) +{ + /* irq should be already disabled */ + dev->pdr_head = pd_entry; + pd->pd_ctl.w = 0; + pd->pd_ctl_len.w = 0; + pd_uinfo->state = PD_ENTRY_FREE; +} + +/* + * derive number of elements in scatterlist + * Shamlessly copy from talitos.c + */ +static int get_sg_count(struct scatterlist *sg_list, int nbytes) +{ + struct scatterlist *sg = sg_list; + int sg_nents = 0; + + while (nbytes) { + sg_nents++; + if (sg->length > nbytes) + break; + nbytes -= sg->length; + sg = sg_next(sg); + } + + return sg_nents; +} + +static u32 get_next_gd(u32 current) +{ + if (current != PPC4XX_LAST_GD) + return current + 1; + else + return 0; +} + +static u32 get_next_sd(u32 current) +{ + if (current != PPC4XX_LAST_SD) + return current + 1; + else + return 0; +} + +u32 crypto4xx_build_pd(struct crypto_async_request *req, + struct crypto4xx_ctx *ctx, + struct scatterlist *src, + struct scatterlist *dst, + unsigned int datalen, + void *iv, u32 iv_len) +{ + struct crypto4xx_device *dev = ctx->dev; + dma_addr_t addr, pd_dma, sd_dma, gd_dma; + struct dynamic_sa_ctl *sa; + struct scatterlist *sg; + struct ce_gd *gd; + struct ce_pd *pd; + u32 num_gd, num_sd; + u32 fst_gd = 0xffffffff; + u32 fst_sd = 0xffffffff; + u32 pd_entry; + unsigned long flags; + struct pd_uinfo *pd_uinfo = NULL; + unsigned int nbytes = datalen, idx; + unsigned int ivlen = 0; + u32 gd_idx = 0; + + /* figure how many gd is needed */ + num_gd = get_sg_count(src, datalen); + if (num_gd == 1) + num_gd = 0; + + /* figure how many sd is needed */ + if (sg_is_last(dst) || ctx->is_hash) { + num_sd = 0; + } else { + if (datalen > PPC4XX_SD_BUFFER_SIZE) { + num_sd = datalen / PPC4XX_SD_BUFFER_SIZE; + if (datalen % PPC4XX_SD_BUFFER_SIZE) + num_sd++; + } else { + num_sd = 1; + } + } + + /* + * The follow section of code needs to be protected + * The gather ring and scatter ring needs to be consecutive + * In case of run out of any kind of descriptor, the descriptor + * already got must be return the original place. + */ + spin_lock_irqsave(&dev->core_dev->lock, flags); + if (num_gd) { + fst_gd = crypto4xx_get_n_gd(dev, num_gd); + if (fst_gd == ERING_WAS_FULL) { + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + return -EAGAIN; + } + } + if (num_sd) { + fst_sd = crypto4xx_get_n_sd(dev, num_sd); + if (fst_sd == ERING_WAS_FULL) { + if (num_gd) + dev->gdr_head = fst_gd; + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + return -EAGAIN; + } + } + pd_entry = crypto4xx_get_pd_from_pdr_nolock(dev); + if (pd_entry == ERING_WAS_FULL) { + if (num_gd) + dev->gdr_head = fst_gd; + if (num_sd) + dev->sdr_head = fst_sd; + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + return -EAGAIN; + } + spin_unlock_irqrestore(&dev->core_dev->lock, flags); + + pd_uinfo = (struct pd_uinfo *)(dev->pdr_uinfo + + sizeof(struct pd_uinfo) * pd_entry); + pd = crypto4xx_get_pdp(dev, &pd_dma, pd_entry); + pd_uinfo->async_req = req; + pd_uinfo->num_gd = num_gd; + pd_uinfo->num_sd = num_sd; + + if (iv_len || ctx->is_hash) { + ivlen = iv_len; + pd->sa = pd_uinfo->sa_pa; + sa = (struct dynamic_sa_ctl *) pd_uinfo->sa_va; + if (ctx->direction == DIR_INBOUND) + memcpy(sa, ctx->sa_in, ctx->sa_len * 4); + else + memcpy(sa, ctx->sa_out, ctx->sa_len * 4); + + memcpy((void *) sa + ctx->offset_to_sr_ptr, + &pd_uinfo->sr_pa, 4); + + if (iv_len) + crypto4xx_memcpy_le(pd_uinfo->sr_va, iv, iv_len); + } else { + if (ctx->direction == DIR_INBOUND) { + pd->sa = ctx->sa_in_dma_addr; + sa = (struct dynamic_sa_ctl *) ctx->sa_in; + } else { + pd->sa = ctx->sa_out_dma_addr; + sa = (struct dynamic_sa_ctl *) ctx->sa_out; + } + } + pd->sa_len = ctx->sa_len; + if (num_gd) { + /* get first gd we are going to use */ + gd_idx = fst_gd; + pd_uinfo->first_gd = fst_gd; + pd_uinfo->num_gd = num_gd; + gd = crypto4xx_get_gdp(dev, &gd_dma, gd_idx); + pd->src = gd_dma; + /* enable gather */ + sa->sa_command_0.bf.gather = 1; + idx = 0; + src = &src[0]; + /* walk the sg, and setup gather array */ + while (nbytes) { + sg = &src[idx]; + addr = dma_map_page(dev->core_dev->device, sg_page(sg), + sg->offset, sg->length, DMA_TO_DEVICE); + gd->ptr = addr; + gd->ctl_len.len = sg->length; + gd->ctl_len.done = 0; + gd->ctl_len.ready = 1; + if (sg->length >= nbytes) + break; + nbytes -= sg->length; + gd_idx = get_next_gd(gd_idx); + gd = crypto4xx_get_gdp(dev, &gd_dma, gd_idx); + idx++; + } + } else { + pd->src = (u32)dma_map_page(dev->core_dev->device, sg_page(src), + src->offset, src->length, DMA_TO_DEVICE); + /* + * Disable gather in sa command + */ + sa->sa_command_0.bf.gather = 0; + /* + * Indicate gather array is not used + */ + pd_uinfo->first_gd = 0xffffffff; + pd_uinfo->num_gd = 0; + } + if (ctx->is_hash || sg_is_last(dst)) { + /* + * we know application give us dst a whole piece of memory + * no need to use scatter ring. + * In case of is_hash, the icv is always at end of src data. + */ + pd_uinfo->using_sd = 0; + pd_uinfo->first_sd = 0xffffffff; + pd_uinfo->num_sd = 0; + pd_uinfo->dest_va = dst; + sa->sa_command_0.bf.scatter = 0; + if (ctx->is_hash) + pd->dest = virt_to_phys((void *)dst); + else + pd->dest = (u32)dma_map_page(dev->core_dev->device, + sg_page(dst), dst->offset, + dst->length, DMA_TO_DEVICE); + } else { + struct ce_sd *sd = NULL; + u32 sd_idx = fst_sd; + nbytes = datalen; + sa->sa_command_0.bf.scatter = 1; + pd_uinfo->using_sd = 1; + pd_uinfo->dest_va = dst; + pd_uinfo->first_sd = fst_sd; + pd_uinfo->num_sd = num_sd; + sd = crypto4xx_get_sdp(dev, &sd_dma, sd_idx); + pd->dest = sd_dma; + /* setup scatter descriptor */ + sd->ctl.done = 0; + sd->ctl.rdy = 1; + /* sd->ptr should be setup by sd_init routine*/ + idx = 0; + if (nbytes >= PPC4XX_SD_BUFFER_SIZE) + nbytes -= PPC4XX_SD_BUFFER_SIZE; + else + nbytes = 0; + while (nbytes) { + sd_idx = get_next_sd(sd_idx); + sd = crypto4xx_get_sdp(dev, &sd_dma, sd_idx); + /* setup scatter descriptor */ + sd->ctl.done = 0; + sd->ctl.rdy = 1; + if (nbytes >= PPC4XX_SD_BUFFER_SIZE) + nbytes -= PPC4XX_SD_BUFFER_SIZE; + else + /* + * SD entry can hold PPC4XX_SD_BUFFER_SIZE, + * which is more than nbytes, so done. + */ + nbytes = 0; + } + } + + sa->sa_command_1.bf.hash_crypto_offset = 0; + pd->pd_ctl.w = ctx->pd_ctl; + pd->pd_ctl_len.w = 0x00400000 | (ctx->bypass << 24) | datalen; + pd_uinfo->state = PD_ENTRY_INUSE; + wmb(); + /* write any value to push engine to read a pd */ + writel(1, dev->ce_base + CRYPTO4XX_INT_DESCR_RD); + return -EINPROGRESS; +} + +/** + * Algorithm Registration Functions + */ +static int crypto4xx_alg_init(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct crypto4xx_alg *amcc_alg = crypto_alg_to_crypto4xx_alg(alg); + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->dev = amcc_alg->dev; + ctx->sa_in = NULL; + ctx->sa_out = NULL; + ctx->sa_in_dma_addr = 0; + ctx->sa_out_dma_addr = 0; + ctx->sa_len = 0; + + if (alg->cra_type == &crypto_ablkcipher_type) + tfm->crt_ablkcipher.reqsize = sizeof(struct crypto4xx_ctx); + else if (alg->cra_type == &crypto_ahash_type) + tfm->crt_ahash.reqsize = sizeof(struct crypto4xx_ctx); + + return 0; +} + +static void crypto4xx_alg_exit(struct crypto_tfm *tfm) +{ + struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto4xx_free_sa(ctx); + crypto4xx_free_state_record(ctx); +} + +int crypto4xx_register_alg(struct crypto4xx_device *sec_dev, + struct crypto_alg *crypto_alg, int array_size) +{ + struct crypto4xx_alg *alg; + int i; + int rc = 0; + + for (i = 0; i < array_size; i++) { + alg = kzalloc(sizeof(struct crypto4xx_alg), GFP_KERNEL); + if (!alg) + return -ENOMEM; + + alg->alg = crypto_alg[i]; + INIT_LIST_HEAD(&alg->alg.cra_list); + if (alg->alg.cra_init == NULL) + alg->alg.cra_init = crypto4xx_alg_init; + if (alg->alg.cra_exit == NULL) + alg->alg.cra_exit = crypto4xx_alg_exit; + alg->dev = sec_dev; + rc = crypto_register_alg(&alg->alg); + if (rc) { + list_del(&alg->entry); + kfree(alg); + } else { + list_add_tail(&alg->entry, &sec_dev->alg_list); + } + } + + return 0; +} + +static void crypto4xx_unregister_alg(struct crypto4xx_device *sec_dev) +{ + struct crypto4xx_alg *alg, *tmp; + + list_for_each_entry_safe(alg, tmp, &sec_dev->alg_list, entry) { + list_del(&alg->entry); + crypto_unregister_alg(&alg->alg); + kfree(alg); + } +} + +static void crypto4xx_bh_tasklet_cb(unsigned long data) +{ + struct device *dev = (struct device *)data; + struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); + struct pd_uinfo *pd_uinfo; + struct ce_pd *pd; + u32 tail; + + while (core_dev->dev->pdr_head != core_dev->dev->pdr_tail) { + tail = core_dev->dev->pdr_tail; + pd_uinfo = core_dev->dev->pdr_uinfo + + sizeof(struct pd_uinfo)*tail; + pd = core_dev->dev->pdr + sizeof(struct ce_pd) * tail; + if ((pd_uinfo->state == PD_ENTRY_INUSE) && + pd->pd_ctl.bf.pe_done && + !pd->pd_ctl.bf.host_ready) { + pd->pd_ctl.bf.pe_done = 0; + crypto4xx_pd_done(core_dev->dev, tail); + crypto4xx_put_pd_to_pdr(core_dev->dev, tail); + pd_uinfo->state = PD_ENTRY_FREE; + } else { + /* if tail not done, break */ + break; + } + } +} + +/** + * Top Half of isr. + */ +static irqreturn_t crypto4xx_ce_interrupt_handler(int irq, void *data) +{ + struct device *dev = (struct device *)data; + struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); + + if (core_dev->dev->ce_base == 0) + return 0; + + writel(PPC4XX_INTERRUPT_CLR, + core_dev->dev->ce_base + CRYPTO4XX_INT_CLR); + tasklet_schedule(&core_dev->tasklet); + + return IRQ_HANDLED; +} + +/** + * Supported Crypto Algorithms + */ +struct crypto_alg crypto4xx_alg[] = { + /* Crypto AES modes */ + { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-ppc4xx", + .cra_priority = CRYPTO4XX_CRYPTO_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto4xx_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_IV_SIZE, + .setkey = crypto4xx_setkey_aes_cbc, + .encrypt = crypto4xx_encrypt, + .decrypt = crypto4xx_decrypt, + } + } + }, + /* Hash SHA1 */ + { + .cra_name = "sha1", + .cra_driver_name = "sha1-ppc4xx", + .cra_priority = CRYPTO4XX_CRYPTO_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto4xx_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ahash_type, + .cra_init = crypto4xx_sha1_alg_init, + .cra_module = THIS_MODULE, + .cra_u = { + .ahash = { + .digestsize = SHA1_DIGEST_SIZE, + .init = crypto4xx_hash_init, + .update = crypto4xx_hash_update, + .final = crypto4xx_hash_final, + .digest = crypto4xx_hash_digest, + } + } + }, +}; + +/** + * Module Initialization Routine + */ +static int __init crypto4xx_probe(struct of_device *ofdev, + const struct of_device_id *match) +{ + int rc; + struct resource res; + struct device *dev = &ofdev->dev; + struct crypto4xx_core_device *core_dev; + + rc = of_address_to_resource(ofdev->node, 0, &res); + if (rc) + return -ENODEV; + + if (of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-crypto")) { + mtdcri(SDR0, PPC460EX_SDR0_SRST, + mfdcri(SDR0, PPC460EX_SDR0_SRST) | PPC460EX_CE_RESET); + mtdcri(SDR0, PPC460EX_SDR0_SRST, + mfdcri(SDR0, PPC460EX_SDR0_SRST) & ~PPC460EX_CE_RESET); + } else if (of_find_compatible_node(NULL, NULL, + "amcc,ppc405ex-crypto")) { + mtdcri(SDR0, PPC405EX_SDR0_SRST, + mfdcri(SDR0, PPC405EX_SDR0_SRST) | PPC405EX_CE_RESET); + mtdcri(SDR0, PPC405EX_SDR0_SRST, + mfdcri(SDR0, PPC405EX_SDR0_SRST) & ~PPC405EX_CE_RESET); + } else if (of_find_compatible_node(NULL, NULL, + "amcc,ppc460sx-crypto")) { + mtdcri(SDR0, PPC460SX_SDR0_SRST, + mfdcri(SDR0, PPC460SX_SDR0_SRST) | PPC460SX_CE_RESET); + mtdcri(SDR0, PPC460SX_SDR0_SRST, + mfdcri(SDR0, PPC460SX_SDR0_SRST) & ~PPC460SX_CE_RESET); + } else { + printk(KERN_ERR "Crypto Function Not supported!\n"); + return -EINVAL; + } + + core_dev = kzalloc(sizeof(struct crypto4xx_core_device), GFP_KERNEL); + if (!core_dev) + return -ENOMEM; + + dev_set_drvdata(dev, core_dev); + core_dev->ofdev = ofdev; + core_dev->dev = kzalloc(sizeof(struct crypto4xx_device), GFP_KERNEL); + if (!core_dev->dev) + goto err_alloc_dev; + + core_dev->dev->core_dev = core_dev; + core_dev->device = dev; + spin_lock_init(&core_dev->lock); + INIT_LIST_HEAD(&core_dev->dev->alg_list); + rc = crypto4xx_build_pdr(core_dev->dev); + if (rc) + goto err_build_pdr; + + rc = crypto4xx_build_gdr(core_dev->dev); + if (rc) + goto err_build_gdr; + + rc = crypto4xx_build_sdr(core_dev->dev); + if (rc) + goto err_build_sdr; + + /* Init tasklet for bottom half processing */ + tasklet_init(&core_dev->tasklet, crypto4xx_bh_tasklet_cb, + (unsigned long) dev); + + /* Register for Crypto isr, Crypto Engine IRQ */ + core_dev->irq = irq_of_parse_and_map(ofdev->node, 0); + rc = request_irq(core_dev->irq, crypto4xx_ce_interrupt_handler, 0, + core_dev->dev->name, dev); + if (rc) + goto err_request_irq; + + core_dev->dev->ce_base = of_iomap(ofdev->node, 0); + if (!core_dev->dev->ce_base) { + dev_err(dev, "failed to of_iomap\n"); + goto err_iomap; + } + + /* need to setup pdr, rdr, gdr and sdr before this */ + crypto4xx_hw_init(core_dev->dev); + + /* Register security algorithms with Linux CryptoAPI */ + rc = crypto4xx_register_alg(core_dev->dev, crypto4xx_alg, + ARRAY_SIZE(crypto4xx_alg)); + if (rc) + goto err_start_dev; + + return 0; + +err_start_dev: + iounmap(core_dev->dev->ce_base); +err_iomap: + free_irq(core_dev->irq, dev); + irq_dispose_mapping(core_dev->irq); + tasklet_kill(&core_dev->tasklet); +err_request_irq: + crypto4xx_destroy_sdr(core_dev->dev); +err_build_sdr: + crypto4xx_destroy_gdr(core_dev->dev); +err_build_gdr: + crypto4xx_destroy_pdr(core_dev->dev); +err_build_pdr: + kfree(core_dev->dev); +err_alloc_dev: + kfree(core_dev); + + return rc; +} + +static int __exit crypto4xx_remove(struct of_device *ofdev) +{ + struct device *dev = &ofdev->dev; + struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); + + free_irq(core_dev->irq, dev); + irq_dispose_mapping(core_dev->irq); + + tasklet_kill(&core_dev->tasklet); + /* Un-register with Linux CryptoAPI */ + crypto4xx_unregister_alg(core_dev->dev); + /* Free all allocated memory */ + crypto4xx_stop_all(core_dev); + + return 0; +} + +static struct of_device_id crypto4xx_match[] = { + { .compatible = "amcc,ppc4xx-crypto",}, + { }, +}; + +static struct of_platform_driver crypto4xx_driver = { + .name = "crypto4xx", + .match_table = crypto4xx_match, + .probe = crypto4xx_probe, + .remove = crypto4xx_remove, +}; + +static int __init crypto4xx_init(void) +{ + return of_register_platform_driver(&crypto4xx_driver); +} + +static void __exit crypto4xx_exit(void) +{ + of_unregister_platform_driver(&crypto4xx_driver); +} + +module_init(crypto4xx_init); +module_exit(crypto4xx_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Hsiao "); +MODULE_DESCRIPTION("Driver for AMCC PPC4xx crypto accelerator"); + diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h new file mode 100644 index 000000000000..1ef103449364 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_core.h @@ -0,0 +1,177 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This is the header file for AMCC Crypto offload Linux device driver for + * use with Linux CryptoAPI. + + */ + +#ifndef __CRYPTO4XX_CORE_H__ +#define __CRYPTO4XX_CORE_H__ + +#define PPC460SX_SDR0_SRST 0x201 +#define PPC405EX_SDR0_SRST 0x200 +#define PPC460EX_SDR0_SRST 0x201 +#define PPC460EX_CE_RESET 0x08000000 +#define PPC460SX_CE_RESET 0x20000000 +#define PPC405EX_CE_RESET 0x00000008 + +#define CRYPTO4XX_CRYPTO_PRIORITY 300 +#define PPC4XX_LAST_PD 63 +#define PPC4XX_NUM_PD 64 +#define PPC4XX_LAST_GD 1023 +#define PPC4XX_NUM_GD 1024 +#define PPC4XX_LAST_SD 63 +#define PPC4XX_NUM_SD 64 +#define PPC4XX_SD_BUFFER_SIZE 2048 + +#define PD_ENTRY_INUSE 1 +#define PD_ENTRY_FREE 0 +#define ERING_WAS_FULL 0xffffffff + +struct crypto4xx_device; + +struct pd_uinfo { + struct crypto4xx_device *dev; + u32 state; + u32 using_sd; + u32 first_gd; /* first gather discriptor + used by this packet */ + u32 num_gd; /* number of gather discriptor + used by this packet */ + u32 first_sd; /* first scatter discriptor + used by this packet */ + u32 num_sd; /* number of scatter discriptors + used by this packet */ + void *sa_va; /* shadow sa, when using cp from ctx->sa */ + u32 sa_pa; + void *sr_va; /* state record for shadow sa */ + u32 sr_pa; + struct scatterlist *dest_va; + struct crypto_async_request *async_req; /* base crypto request + for this packet */ +}; + +struct crypto4xx_device { + struct crypto4xx_core_device *core_dev; + char *name; + u64 ce_phy_address; + void __iomem *ce_base; + + void *pdr; /* base address of packet + descriptor ring */ + dma_addr_t pdr_pa; /* physical address used to + program ce pdr_base_register */ + void *gdr; /* gather descriptor ring */ + dma_addr_t gdr_pa; /* physical address used to + program ce gdr_base_register */ + void *sdr; /* scatter descriptor ring */ + dma_addr_t sdr_pa; /* physical address used to + program ce sdr_base_register */ + void *scatter_buffer_va; + dma_addr_t scatter_buffer_pa; + u32 scatter_buffer_size; + + void *shadow_sa_pool; /* pool of memory for sa in pd_uinfo */ + dma_addr_t shadow_sa_pool_pa; + void *shadow_sr_pool; /* pool of memory for sr in pd_uinfo */ + dma_addr_t shadow_sr_pool_pa; + u32 pdr_tail; + u32 pdr_head; + u32 gdr_tail; + u32 gdr_head; + u32 sdr_tail; + u32 sdr_head; + void *pdr_uinfo; + struct list_head alg_list; /* List of algorithm supported + by this device */ +}; + +struct crypto4xx_core_device { + struct device *device; + struct of_device *ofdev; + struct crypto4xx_device *dev; + u32 int_status; + u32 irq; + struct tasklet_struct tasklet; + spinlock_t lock; +}; + +struct crypto4xx_ctx { + struct crypto4xx_device *dev; + void *sa_in; + dma_addr_t sa_in_dma_addr; + void *sa_out; + dma_addr_t sa_out_dma_addr; + void *state_record; + dma_addr_t state_record_dma_addr; + u32 sa_len; + u32 offset_to_sr_ptr; /* offset to state ptr, in dynamic sa */ + u32 direction; + u32 next_hdr; + u32 save_iv; + u32 pd_ctl_len; + u32 pd_ctl; + u32 bypass; + u32 is_hash; + u32 hash_final; +}; + +struct crypto4xx_req_ctx { + struct crypto4xx_device *dev; /* Device in which + operation to send to */ + void *sa; + u32 sa_dma_addr; + u16 sa_len; +}; + +struct crypto4xx_alg { + struct list_head entry; + struct crypto_alg alg; + struct crypto4xx_device *dev; +}; + +#define crypto_alg_to_crypto4xx_alg(x) \ + container_of(x, struct crypto4xx_alg, alg) + +extern int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size); +extern void crypto4xx_free_sa(struct crypto4xx_ctx *ctx); +extern u32 crypto4xx_alloc_sa_rctx(struct crypto4xx_ctx *ctx, + struct crypto4xx_ctx *rctx); +extern void crypto4xx_free_sa_rctx(struct crypto4xx_ctx *rctx); +extern void crypto4xx_free_ctx(struct crypto4xx_ctx *ctx); +extern u32 crypto4xx_alloc_state_record(struct crypto4xx_ctx *ctx); +extern u32 get_dynamic_sa_offset_state_ptr_field(struct crypto4xx_ctx *ctx); +extern u32 get_dynamic_sa_offset_key_field(struct crypto4xx_ctx *ctx); +extern u32 get_dynamic_sa_iv_size(struct crypto4xx_ctx *ctx); +extern void crypto4xx_memcpy_le(unsigned int *dst, + const unsigned char *buf, int len); +extern u32 crypto4xx_build_pd(struct crypto_async_request *req, + struct crypto4xx_ctx *ctx, + struct scatterlist *src, + struct scatterlist *dst, + unsigned int datalen, + void *iv, u32 iv_len); +extern int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher, + const u8 *key, unsigned int keylen); +extern int crypto4xx_encrypt(struct ablkcipher_request *req); +extern int crypto4xx_decrypt(struct ablkcipher_request *req); +extern int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm); +extern int crypto4xx_hash_digest(struct ahash_request *req); +extern int crypto4xx_hash_final(struct ahash_request *req); +extern int crypto4xx_hash_update(struct ahash_request *req); +extern int crypto4xx_hash_init(struct ahash_request *req); +#endif diff --git a/drivers/crypto/amcc/crypto4xx_reg_def.h b/drivers/crypto/amcc/crypto4xx_reg_def.h new file mode 100644 index 000000000000..7d4edb002619 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_reg_def.h @@ -0,0 +1,284 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This filr defines the register set for Security Subsystem + */ + +#ifndef __CRYPTO4XX_REG_DEF_H__ +#define __CRYPTO4XX_REG_DEF_H__ + +/* CRYPTO4XX Register offset */ +#define CRYPTO4XX_DESCRIPTOR 0x00000000 +#define CRYPTO4XX_CTRL_STAT 0x00000000 +#define CRYPTO4XX_SOURCE 0x00000004 +#define CRYPTO4XX_DEST 0x00000008 +#define CRYPTO4XX_SA 0x0000000C +#define CRYPTO4XX_SA_LENGTH 0x00000010 +#define CRYPTO4XX_LENGTH 0x00000014 + +#define CRYPTO4XX_PE_DMA_CFG 0x00000040 +#define CRYPTO4XX_PE_DMA_STAT 0x00000044 +#define CRYPTO4XX_PDR_BASE 0x00000048 +#define CRYPTO4XX_RDR_BASE 0x0000004c +#define CRYPTO4XX_RING_SIZE 0x00000050 +#define CRYPTO4XX_RING_CTRL 0x00000054 +#define CRYPTO4XX_INT_RING_STAT 0x00000058 +#define CRYPTO4XX_EXT_RING_STAT 0x0000005c +#define CRYPTO4XX_IO_THRESHOLD 0x00000060 +#define CRYPTO4XX_GATH_RING_BASE 0x00000064 +#define CRYPTO4XX_SCAT_RING_BASE 0x00000068 +#define CRYPTO4XX_PART_RING_SIZE 0x0000006c +#define CRYPTO4XX_PART_RING_CFG 0x00000070 + +#define CRYPTO4XX_PDR_BASE_UADDR 0x00000080 +#define CRYPTO4XX_RDR_BASE_UADDR 0x00000084 +#define CRYPTO4XX_PKT_SRC_UADDR 0x00000088 +#define CRYPTO4XX_PKT_DEST_UADDR 0x0000008c +#define CRYPTO4XX_SA_UADDR 0x00000090 +#define CRYPTO4XX_GATH_RING_BASE_UADDR 0x000000A0 +#define CRYPTO4XX_SCAT_RING_BASE_UADDR 0x000000A4 + +#define CRYPTO4XX_SEQ_RD 0x00000408 +#define CRYPTO4XX_SEQ_MASK_RD 0x0000040C + +#define CRYPTO4XX_SA_CMD_0 0x00010600 +#define CRYPTO4XX_SA_CMD_1 0x00010604 + +#define CRYPTO4XX_STATE_PTR 0x000106dc +#define CRYPTO4XX_STATE_IV 0x00010700 +#define CRYPTO4XX_STATE_HASH_BYTE_CNT_0 0x00010710 +#define CRYPTO4XX_STATE_HASH_BYTE_CNT_1 0x00010714 + +#define CRYPTO4XX_STATE_IDIGEST_0 0x00010718 +#define CRYPTO4XX_STATE_IDIGEST_1 0x0001071c + +#define CRYPTO4XX_DATA_IN 0x00018000 +#define CRYPTO4XX_DATA_OUT 0x0001c000 + +#define CRYPTO4XX_INT_UNMASK_STAT 0x000500a0 +#define CRYPTO4XX_INT_MASK_STAT 0x000500a4 +#define CRYPTO4XX_INT_CLR 0x000500a4 +#define CRYPTO4XX_INT_EN 0x000500a8 + +#define CRYPTO4XX_INT_PKA 0x00000002 +#define CRYPTO4XX_INT_PDR_DONE 0x00008000 +#define CRYPTO4XX_INT_MA_WR_ERR 0x00020000 +#define CRYPTO4XX_INT_MA_RD_ERR 0x00010000 +#define CRYPTO4XX_INT_PE_ERR 0x00000200 +#define CRYPTO4XX_INT_USER_DMA_ERR 0x00000040 +#define CRYPTO4XX_INT_SLAVE_ERR 0x00000010 +#define CRYPTO4XX_INT_MASTER_ERR 0x00000008 +#define CRYPTO4XX_INT_ERROR 0x00030258 + +#define CRYPTO4XX_INT_CFG 0x000500ac +#define CRYPTO4XX_INT_DESCR_RD 0x000500b0 +#define CRYPTO4XX_INT_DESCR_CNT 0x000500b4 +#define CRYPTO4XX_INT_TIMEOUT_CNT 0x000500b8 + +#define CRYPTO4XX_DEVICE_CTRL 0x00060080 +#define CRYPTO4XX_DEVICE_ID 0x00060084 +#define CRYPTO4XX_DEVICE_INFO 0x00060088 +#define CRYPTO4XX_DMA_USER_SRC 0x00060094 +#define CRYPTO4XX_DMA_USER_DEST 0x00060098 +#define CRYPTO4XX_DMA_USER_CMD 0x0006009C + +#define CRYPTO4XX_DMA_CFG 0x000600d4 +#define CRYPTO4XX_BYTE_ORDER_CFG 0x000600d8 +#define CRYPTO4XX_ENDIAN_CFG 0x000600d8 + +#define CRYPTO4XX_PRNG_STAT 0x00070000 +#define CRYPTO4XX_PRNG_CTRL 0x00070004 +#define CRYPTO4XX_PRNG_SEED_L 0x00070008 +#define CRYPTO4XX_PRNG_SEED_H 0x0007000c + +#define CRYPTO4XX_PRNG_RES_0 0x00070020 +#define CRYPTO4XX_PRNG_RES_1 0x00070024 +#define CRYPTO4XX_PRNG_RES_2 0x00070028 +#define CRYPTO4XX_PRNG_RES_3 0x0007002C + +#define CRYPTO4XX_PRNG_LFSR_L 0x00070030 +#define CRYPTO4XX_PRNG_LFSR_H 0x00070034 + +/** + * Initilize CRYPTO ENGINE registers, and memory bases. + */ +#define PPC4XX_PDR_POLL 0x3ff +#define PPC4XX_OUTPUT_THRESHOLD 2 +#define PPC4XX_INPUT_THRESHOLD 2 +#define PPC4XX_PD_SIZE 6 +#define PPC4XX_CTX_DONE_INT 0x2000 +#define PPC4XX_PD_DONE_INT 0x8000 +#define PPC4XX_BYTE_ORDER 0x22222 +#define PPC4XX_INTERRUPT_CLR 0x3ffff +#define PPC4XX_PRNG_CTRL_AUTO_EN 0x3 +#define PPC4XX_DC_3DES_EN 1 +#define PPC4XX_INT_DESCR_CNT 4 +#define PPC4XX_INT_TIMEOUT_CNT 0 +#define PPC4XX_INT_CFG 1 +/** + * all follow define are ad hoc + */ +#define PPC4XX_RING_RETRY 100 +#define PPC4XX_RING_POLL 100 +#define PPC4XX_SDR_SIZE PPC4XX_NUM_SD +#define PPC4XX_GDR_SIZE PPC4XX_NUM_GD + +/** + * Generic Security Association (SA) with all possible fields. These will + * never likely used except for reference purpose. These structure format + * can be not changed as the hardware expects them to be layout as defined. + * Field can be removed or reduced but ordering can not be changed. + */ +#define CRYPTO4XX_DMA_CFG_OFFSET 0x40 +union ce_pe_dma_cfg { + struct { + u32 rsv:7; + u32 dir_host:1; + u32 rsv1:2; + u32 bo_td_en:1; + u32 dis_pdr_upd:1; + u32 bo_sgpd_en:1; + u32 bo_data_en:1; + u32 bo_sa_en:1; + u32 bo_pd_en:1; + u32 rsv2:4; + u32 dynamic_sa_en:1; + u32 pdr_mode:2; + u32 pe_mode:1; + u32 rsv3:5; + u32 reset_sg:1; + u32 reset_pdr:1; + u32 reset_pe:1; + } bf; + u32 w; +} __attribute__((packed)); + +#define CRYPTO4XX_PDR_BASE_OFFSET 0x48 +#define CRYPTO4XX_RDR_BASE_OFFSET 0x4c +#define CRYPTO4XX_RING_SIZE_OFFSET 0x50 +union ce_ring_size { + struct { + u32 ring_offset:16; + u32 rsv:6; + u32 ring_size:10; + } bf; + u32 w; +} __attribute__((packed)); + +#define CRYPTO4XX_RING_CONTROL_OFFSET 0x54 +union ce_ring_contol { + struct { + u32 continuous:1; + u32 rsv:5; + u32 ring_retry_divisor:10; + u32 rsv1:4; + u32 ring_poll_divisor:10; + } bf; + u32 w; +} __attribute__((packed)); + +#define CRYPTO4XX_IO_THRESHOLD_OFFSET 0x60 +union ce_io_threshold { + struct { + u32 rsv:6; + u32 output_threshold:10; + u32 rsv1:6; + u32 input_threshold:10; + } bf; + u32 w; +} __attribute__((packed)); + +#define CRYPTO4XX_GATHER_RING_BASE_OFFSET 0x64 +#define CRYPTO4XX_SCATTER_RING_BASE_OFFSET 0x68 + +union ce_part_ring_size { + struct { + u32 sdr_size:16; + u32 gdr_size:16; + } bf; + u32 w; +} __attribute__((packed)); + +#define MAX_BURST_SIZE_32 0 +#define MAX_BURST_SIZE_64 1 +#define MAX_BURST_SIZE_128 2 +#define MAX_BURST_SIZE_256 3 + +/* gather descriptor control length */ +struct gd_ctl_len { + u32 len:16; + u32 rsv:14; + u32 done:1; + u32 ready:1; +} __attribute__((packed)); + +struct ce_gd { + u32 ptr; + struct gd_ctl_len ctl_len; +} __attribute__((packed)); + +struct sd_ctl { + u32 ctl:30; + u32 done:1; + u32 rdy:1; +} __attribute__((packed)); + +struct ce_sd { + u32 ptr; + struct sd_ctl ctl; +} __attribute__((packed)); + +#define PD_PAD_CTL_32 0x10 +#define PD_PAD_CTL_64 0x20 +#define PD_PAD_CTL_128 0x40 +#define PD_PAD_CTL_256 0x80 +union ce_pd_ctl { + struct { + u32 pd_pad_ctl:8; + u32 status:8; + u32 next_hdr:8; + u32 rsv:2; + u32 cached_sa:1; + u32 hash_final:1; + u32 init_arc4:1; + u32 rsv1:1; + u32 pe_done:1; + u32 host_ready:1; + } bf; + u32 w; +} __attribute__((packed)); + +union ce_pd_ctl_len { + struct { + u32 bypass:8; + u32 pe_done:1; + u32 host_ready:1; + u32 rsv:2; + u32 pkt_len:20; + } bf; + u32 w; +} __attribute__((packed)); + +struct ce_pd { + union ce_pd_ctl pd_ctl; + u32 src; + u32 dest; + u32 sa; /* get from ctx->sa_dma_addr */ + u32 sa_len; /* only if dynamic sa is used */ + union ce_pd_ctl_len pd_ctl_len; + +} __attribute__((packed)); +#endif diff --git a/drivers/crypto/amcc/crypto4xx_sa.c b/drivers/crypto/amcc/crypto4xx_sa.c new file mode 100644 index 000000000000..466fd94cd4a3 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_sa.c @@ -0,0 +1,108 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @file crypto4xx_sa.c + * + * This file implements the security context + * assoicate format. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "crypto4xx_reg_def.h" +#include "crypto4xx_sa.h" +#include "crypto4xx_core.h" + +u32 get_dynamic_sa_offset_iv_field(struct crypto4xx_ctx *ctx) +{ + u32 offset; + union dynamic_sa_contents cts; + + if (ctx->direction == DIR_INBOUND) + cts.w = ((struct dynamic_sa_ctl *)(ctx->sa_in))->sa_contents; + else + cts.w = ((struct dynamic_sa_ctl *)(ctx->sa_out))->sa_contents; + offset = cts.bf.key_size + + cts.bf.inner_size + + cts.bf.outer_size + + cts.bf.spi + + cts.bf.seq_num0 + + cts.bf.seq_num1 + + cts.bf.seq_num_mask0 + + cts.bf.seq_num_mask1 + + cts.bf.seq_num_mask2 + + cts.bf.seq_num_mask3; + + return sizeof(struct dynamic_sa_ctl) + offset * 4; +} + +u32 get_dynamic_sa_offset_state_ptr_field(struct crypto4xx_ctx *ctx) +{ + u32 offset; + union dynamic_sa_contents cts; + + if (ctx->direction == DIR_INBOUND) + cts.w = ((struct dynamic_sa_ctl *) ctx->sa_in)->sa_contents; + else + cts.w = ((struct dynamic_sa_ctl *) ctx->sa_out)->sa_contents; + offset = cts.bf.key_size + + cts.bf.inner_size + + cts.bf.outer_size + + cts.bf.spi + + cts.bf.seq_num0 + + cts.bf.seq_num1 + + cts.bf.seq_num_mask0 + + cts.bf.seq_num_mask1 + + cts.bf.seq_num_mask2 + + cts.bf.seq_num_mask3 + + cts.bf.iv0 + + cts.bf.iv1 + + cts.bf.iv2 + + cts.bf.iv3; + + return sizeof(struct dynamic_sa_ctl) + offset * 4; +} + +u32 get_dynamic_sa_iv_size(struct crypto4xx_ctx *ctx) +{ + union dynamic_sa_contents cts; + + if (ctx->direction == DIR_INBOUND) + cts.w = ((struct dynamic_sa_ctl *) ctx->sa_in)->sa_contents; + else + cts.w = ((struct dynamic_sa_ctl *) ctx->sa_out)->sa_contents; + return (cts.bf.iv0 + cts.bf.iv1 + cts.bf.iv2 + cts.bf.iv3) * 4; +} + +u32 get_dynamic_sa_offset_key_field(struct crypto4xx_ctx *ctx) +{ + union dynamic_sa_contents cts; + + if (ctx->direction == DIR_INBOUND) + cts.w = ((struct dynamic_sa_ctl *) ctx->sa_in)->sa_contents; + else + cts.w = ((struct dynamic_sa_ctl *) ctx->sa_out)->sa_contents; + + return sizeof(struct dynamic_sa_ctl); +} diff --git a/drivers/crypto/amcc/crypto4xx_sa.h b/drivers/crypto/amcc/crypto4xx_sa.h new file mode 100644 index 000000000000..4b83ed7e5570 --- /dev/null +++ b/drivers/crypto/amcc/crypto4xx_sa.h @@ -0,0 +1,243 @@ +/** + * AMCC SoC PPC4xx Crypto Driver + * + * Copyright (c) 2008 Applied Micro Circuits Corporation. + * All rights reserved. James Hsiao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This file defines the security context + * assoicate format. + */ + +#ifndef __CRYPTO4XX_SA_H__ +#define __CRYPTO4XX_SA_H__ + +#define AES_IV_SIZE 16 + +/** + * Contents of Dynamic Security Association (SA) with all possible fields + */ +union dynamic_sa_contents { + struct { + u32 arc4_state_ptr:1; + u32 arc4_ij_ptr:1; + u32 state_ptr:1; + u32 iv3:1; + u32 iv2:1; + u32 iv1:1; + u32 iv0:1; + u32 seq_num_mask3:1; + u32 seq_num_mask2:1; + u32 seq_num_mask1:1; + u32 seq_num_mask0:1; + u32 seq_num1:1; + u32 seq_num0:1; + u32 spi:1; + u32 outer_size:5; + u32 inner_size:5; + u32 key_size:4; + u32 cmd_size:4; + } bf; + u32 w; +} __attribute__((packed)); + +#define DIR_OUTBOUND 0 +#define DIR_INBOUND 1 +#define SA_OP_GROUP_BASIC 0 +#define SA_OPCODE_ENCRYPT 0 +#define SA_OPCODE_DECRYPT 0 +#define SA_OPCODE_HASH 3 +#define SA_CIPHER_ALG_DES 0 +#define SA_CIPHER_ALG_3DES 1 +#define SA_CIPHER_ALG_ARC4 2 +#define SA_CIPHER_ALG_AES 3 +#define SA_CIPHER_ALG_KASUMI 4 +#define SA_CIPHER_ALG_NULL 15 + +#define SA_HASH_ALG_MD5 0 +#define SA_HASH_ALG_SHA1 1 +#define SA_HASH_ALG_NULL 15 +#define SA_HASH_ALG_SHA1_DIGEST_SIZE 20 + +#define SA_LOAD_HASH_FROM_SA 0 +#define SA_LOAD_HASH_FROM_STATE 2 +#define SA_NOT_LOAD_HASH 3 +#define SA_LOAD_IV_FROM_SA 0 +#define SA_LOAD_IV_FROM_INPUT 1 +#define SA_LOAD_IV_FROM_STATE 2 +#define SA_LOAD_IV_GEN_IV 3 + +#define SA_PAD_TYPE_CONSTANT 2 +#define SA_PAD_TYPE_ZERO 3 +#define SA_PAD_TYPE_TLS 5 +#define SA_PAD_TYPE_DTLS 5 +#define SA_NOT_SAVE_HASH 0 +#define SA_SAVE_HASH 1 +#define SA_NOT_SAVE_IV 0 +#define SA_SAVE_IV 1 +#define SA_HEADER_PROC 1 +#define SA_NO_HEADER_PROC 0 + +union sa_command_0 { + struct { + u32 scatter:1; + u32 gather:1; + u32 save_hash_state:1; + u32 save_iv:1; + u32 load_hash_state:2; + u32 load_iv:2; + u32 digest_len:4; + u32 hdr_proc:1; + u32 extend_pad:1; + u32 stream_cipher_pad:1; + u32 rsv:1; + u32 hash_alg:4; + u32 cipher_alg:4; + u32 pad_type:2; + u32 op_group:2; + u32 dir:1; + u32 opcode:3; + } bf; + u32 w; +} __attribute__((packed)); + +#define CRYPTO_MODE_ECB 0 +#define CRYPTO_MODE_CBC 1 + +#define CRYPTO_FEEDBACK_MODE_NO_FB 0 +#define CRYPTO_FEEDBACK_MODE_64BIT_OFB 0 +#define CRYPTO_FEEDBACK_MODE_8BIT_CFB 1 +#define CRYPTO_FEEDBACK_MODE_1BIT_CFB 2 +#define CRYPTO_FEEDBACK_MODE_128BIT_CFB 3 + +#define SA_AES_KEY_LEN_128 2 +#define SA_AES_KEY_LEN_192 3 +#define SA_AES_KEY_LEN_256 4 + +#define SA_REV2 1 +/** + * The follow defines bits sa_command_1 + * In Basic hash mode this bit define simple hash or hmac. + * In IPsec mode, this bit define muting control. + */ +#define SA_HASH_MODE_HASH 0 +#define SA_HASH_MODE_HMAC 1 +#define SA_MC_ENABLE 0 +#define SA_MC_DISABLE 1 +#define SA_NOT_COPY_HDR 0 +#define SA_COPY_HDR 1 +#define SA_NOT_COPY_PAD 0 +#define SA_COPY_PAD 1 +#define SA_NOT_COPY_PAYLOAD 0 +#define SA_COPY_PAYLOAD 1 +#define SA_EXTENDED_SN_OFF 0 +#define SA_EXTENDED_SN_ON 1 +#define SA_SEQ_MASK_OFF 0 +#define SA_SEQ_MASK_ON 1 + +union sa_command_1 { + struct { + u32 crypto_mode31:1; + u32 save_arc4_state:1; + u32 arc4_stateful:1; + u32 key_len:5; + u32 hash_crypto_offset:8; + u32 sa_rev:2; + u32 byte_offset:1; + u32 hmac_muting:1; + u32 feedback_mode:2; + u32 crypto_mode9_8:2; + u32 extended_seq_num:1; + u32 seq_num_mask:1; + u32 mutable_bit_proc:1; + u32 ip_version:1; + u32 copy_pad:1; + u32 copy_payload:1; + u32 copy_hdr:1; + u32 rsv1:1; + } bf; + u32 w; +} __attribute__((packed)); + +struct dynamic_sa_ctl { + u32 sa_contents; + union sa_command_0 sa_command_0; + union sa_command_1 sa_command_1; +} __attribute__((packed)); + +/** + * State Record for Security Association (SA) + */ +struct sa_state_record { + u32 save_iv[4]; + u32 save_hash_byte_cnt[2]; + u32 save_digest[16]; +} __attribute__((packed)); + +/** + * Security Association (SA) for AES128 + * + */ +struct dynamic_sa_aes128 { + struct dynamic_sa_ctl ctrl; + u32 key[4]; + u32 iv[4]; /* for CBC, OFC, and CFB mode */ + u32 state_ptr; + u32 reserved; +} __attribute__((packed)); + +#define SA_AES128_LEN (sizeof(struct dynamic_sa_aes128)/4) +#define SA_AES128_CONTENTS 0x3e000042 + +/* + * Security Association (SA) for AES192 + */ +struct dynamic_sa_aes192 { + struct dynamic_sa_ctl ctrl; + u32 key[6]; + u32 iv[4]; /* for CBC, OFC, and CFB mode */ + u32 state_ptr; + u32 reserved; +} __attribute__((packed)); + +#define SA_AES192_LEN (sizeof(struct dynamic_sa_aes192)/4) +#define SA_AES192_CONTENTS 0x3e000062 + +/** + * Security Association (SA) for AES256 + */ +struct dynamic_sa_aes256 { + struct dynamic_sa_ctl ctrl; + u32 key[8]; + u32 iv[4]; /* for CBC, OFC, and CFB mode */ + u32 state_ptr; + u32 reserved; +} __attribute__((packed)); + +#define SA_AES256_LEN (sizeof(struct dynamic_sa_aes256)/4) +#define SA_AES256_CONTENTS 0x3e000082 +#define SA_AES_CONTENTS 0x3e000002 + +/** + * Security Association (SA) for HASH160: HMAC-SHA1 + */ +struct dynamic_sa_hash160 { + struct dynamic_sa_ctl ctrl; + u32 inner_digest[5]; + u32 outer_digest[5]; + u32 state_ptr; + u32 reserved; +} __attribute__((packed)); +#define SA_HASH160_LEN (sizeof(struct dynamic_sa_hash160)/4) +#define SA_HASH160_CONTENTS 0x2000a502 + +#endif -- cgit v1.2.3-58-ga151 From 74019224ac34b044b44a31dd89a54e3477db4896 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 18 Feb 2009 12:23:29 +0100 Subject: timers: add mod_timer_pending() Impact: new timer API Based on an idea from Martin Josefsson with the help of Patrick McHardy and Stephen Hemminger: introduce the mod_timer_pending() API which is a mod_timer() offspring that is an invariant on already removed timers. (regular mod_timer() re-activates non-pending timers.) This is useful for the networking code in that it can allow unserialized mod_timer_pending() timer-forwarding calls, but a single del_timer*() will stop the timer from being reactivated again. Also while at it: - optimize the regular mod_timer() path some more, the timer-stat and a debug check was needlessly duplicated in __mod_timer(). - make the exports come straight after the function, as most other exports in timer.c already did. - eliminate __mod_timer() as an external API, change the users to mod_timer(). The regular mod_timer() code path is not impacted significantly, due to inlining optimizations and due to the simplifications. Based-on-patch-from: Stephen Hemminger Acked-by: Stephen Hemminger Cc: "David S. Miller" Cc: Patrick McHardy Cc: netdev@vger.kernel.org Cc: Oleg Nesterov Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/powerpc/platforms/cell/spufs/sched.c | 2 +- drivers/infiniband/hw/ipath/ipath_driver.c | 6 +- include/linux/timer.h | 22 +----- kernel/relay.c | 2 +- kernel/timer.c | 110 +++++++++++++++++++---------- 5 files changed, 80 insertions(+), 62 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 6a0ad196aeb3..f085369301b1 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -508,7 +508,7 @@ static void __spu_add_to_rq(struct spu_context *ctx) list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); set_bit(ctx->prio, spu_prio->bitmap); if (!spu_prio->nr_waiting++) - __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); } } diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 69c0ce321b4e..cb9daa6ac029 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2715,7 +2715,7 @@ static void ipath_hol_signal_up(struct ipath_devdata *dd) * to prevent HoL blocking, then start the HoL timer that * periodically continues, then stop procs, so they can detect * link down if they want, and do something about it. - * Timer may already be running, so use __mod_timer, not add_timer. + * Timer may already be running, so use mod_timer, not add_timer. */ void ipath_hol_down(struct ipath_devdata *dd) { @@ -2724,7 +2724,7 @@ void ipath_hol_down(struct ipath_devdata *dd) dd->ipath_hol_next = IPATH_HOL_DOWNCONT; dd->ipath_hol_timer.expires = jiffies + msecs_to_jiffies(ipath_hol_timeout_ms); - __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); + mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); } /* @@ -2763,7 +2763,7 @@ void ipath_hol_event(unsigned long opaque) else { dd->ipath_hol_timer.expires = jiffies + msecs_to_jiffies(ipath_hol_timeout_ms); - __mod_timer(&dd->ipath_hol_timer, + mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); } } diff --git a/include/linux/timer.h b/include/linux/timer.h index daf9685b861c..e2d662e3416e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -86,8 +86,8 @@ static inline int timer_pending(const struct timer_list * timer) extern void add_timer_on(struct timer_list *timer, int cpu); extern int del_timer(struct timer_list * timer); -extern int __mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer(struct timer_list *timer, unsigned long expires); +extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); /* * The jiffies value which is added to now, when there is no timer @@ -146,25 +146,7 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) } #endif -/** - * add_timer - start a timer - * @timer: the timer to be added - * - * The kernel will do a ->function(->data) callback from the - * timer interrupt at the ->expires point in the future. The - * current time is 'jiffies'. - * - * The timer's ->expires, ->function (and if the handler uses it, ->data) - * fields must be set prior calling this function. - * - * Timers with an ->expires field in the past will be executed in the next - * timer tick. - */ -static inline void add_timer(struct timer_list *timer) -{ - BUG_ON(timer_pending(timer)); - __mod_timer(timer, timer->expires); -} +extern void add_timer(struct timer_list *timer); #ifdef CONFIG_SMP extern int try_to_del_timer_sync(struct timer_list *timer); diff --git a/kernel/relay.c b/kernel/relay.c index 9d79b7854fa6..8f2179c8056f 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -750,7 +750,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) * from the scheduler (trying to re-grab * rq->lock), so defer it. */ - __mod_timer(&buf->timer, jiffies + 1); + mod_timer(&buf->timer, jiffies + 1); } old = buf->data; diff --git a/kernel/timer.c b/kernel/timer.c index 13dd64fe143d..9b77fc9a9ac8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -589,11 +589,14 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, } } -int __mod_timer(struct timer_list *timer, unsigned long expires) +static inline int +__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) { struct tvec_base *base, *new_base; unsigned long flags; - int ret = 0; + int ret; + + ret = 0; timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); @@ -603,6 +606,9 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer)) { detach_timer(timer, 0); ret = 1; + } else { + if (pending_only) + goto out_unlock; } debug_timer_activate(timer); @@ -629,42 +635,28 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) timer->expires = expires; internal_add_timer(base, timer); + +out_unlock: spin_unlock_irqrestore(&base->lock, flags); return ret; } -EXPORT_SYMBOL(__mod_timer); - /** - * add_timer_on - start a timer on a particular CPU - * @timer: the timer to be added - * @cpu: the CPU to start it on + * mod_timer_pending - modify a pending timer's timeout + * @timer: the pending timer to be modified + * @expires: new timeout in jiffies * - * This is not very scalable on SMP. Double adds are not possible. + * mod_timer_pending() is the same for pending timers as mod_timer(), + * but will not re-activate and modify already deleted timers. + * + * It is useful for unserialized use of timers. */ -void add_timer_on(struct timer_list *timer, int cpu) +int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - struct tvec_base *base = per_cpu(tvec_bases, cpu); - unsigned long flags; - - timer_stats_timer_set_start_info(timer); - BUG_ON(timer_pending(timer) || !timer->function); - spin_lock_irqsave(&base->lock, flags); - timer_set_base(timer, base); - debug_timer_activate(timer); - internal_add_timer(base, timer); - /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling - * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. - */ - wake_up_idle_cpu(cpu); - spin_unlock_irqrestore(&base->lock, flags); + return __mod_timer(timer, expires, true); } +EXPORT_SYMBOL(mod_timer_pending); /** * mod_timer - modify a timer's timeout @@ -688,9 +680,6 @@ void add_timer_on(struct timer_list *timer, int cpu) */ int mod_timer(struct timer_list *timer, unsigned long expires) { - BUG_ON(!timer->function); - - timer_stats_timer_set_start_info(timer); /* * This is a common optimization triggered by the * networking code - if the timer is re-modified @@ -699,11 +688,61 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer->expires == expires && timer_pending(timer)) return 1; - return __mod_timer(timer, expires); + return __mod_timer(timer, expires, false); } - EXPORT_SYMBOL(mod_timer); +/** + * add_timer - start a timer + * @timer: the timer to be added + * + * The kernel will do a ->function(->data) callback from the + * timer interrupt at the ->expires point in the future. The + * current time is 'jiffies'. + * + * The timer's ->expires, ->function (and if the handler uses it, ->data) + * fields must be set prior calling this function. + * + * Timers with an ->expires field in the past will be executed in the next + * timer tick. + */ +void add_timer(struct timer_list *timer) +{ + BUG_ON(timer_pending(timer)); + mod_timer(timer, timer->expires); +} +EXPORT_SYMBOL(add_timer); + +/** + * add_timer_on - start a timer on a particular CPU + * @timer: the timer to be added + * @cpu: the CPU to start it on + * + * This is not very scalable on SMP. Double adds are not possible. + */ +void add_timer_on(struct timer_list *timer, int cpu) +{ + struct tvec_base *base = per_cpu(tvec_bases, cpu); + unsigned long flags; + + timer_stats_timer_set_start_info(timer); + BUG_ON(timer_pending(timer) || !timer->function); + spin_lock_irqsave(&base->lock, flags); + timer_set_base(timer, base); + debug_timer_activate(timer); + internal_add_timer(base, timer); + /* + * Check whether the other CPU is idle and needs to be + * triggered to reevaluate the timer wheel when nohz is + * active. We are protected against the other CPU fiddling + * with the timer by holding the timer base lock. This also + * makes sure that a CPU on the way to idle can not evaluate + * the timer wheel. + */ + wake_up_idle_cpu(cpu); + spin_unlock_irqrestore(&base->lock, flags); +} + /** * del_timer - deactive a timer. * @timer: the timer to be deactivated @@ -733,7 +772,6 @@ int del_timer(struct timer_list *timer) return ret; } - EXPORT_SYMBOL(del_timer); #ifdef CONFIG_SMP @@ -767,7 +805,6 @@ out: return ret; } - EXPORT_SYMBOL(try_to_del_timer_sync); /** @@ -796,7 +833,6 @@ int del_timer_sync(struct timer_list *timer) cpu_relax(); } } - EXPORT_SYMBOL(del_timer_sync); #endif @@ -1268,7 +1304,7 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire); + __mod_timer(&timer, expire, false); schedule(); del_singleshot_timer_sync(&timer); -- cgit v1.2.3-58-ga151 From 19390c4d03688b9940a1836f06b76ec622b9cd6f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: linker script: define __per_cpu_load on all SMP capable archs Impact: __per_cpu_load available on all SMP capable archs Percpu now requires three symbols to be defined - __per_cpu_load, __per_cpu_start and __per_cpu_end. There were three archs which didn't have it. Update them as follows. * powerpc: can use generic PERCPU() macro. Compile tested for powerpc32, compile/boot tested for powerpc64. * ia64: can use generic PERCPU_VADDR() macro. __phys_per_cpu_start is identical to __per_cpu_load. Compile tested and symbol table looks identical after the change except for the additional __per_cpu_load. * arm: added explicit __per_cpu_load definition. Currently uses unified .init output section so can't use the generic macro. Dunno whether the unified .init ouput section is required by arch peculiarity so I left it alone. Please break it up and use PERCPU() if possible. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Pat Gefre Cc: Russell King --- arch/arm/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 12 ++---------- arch/powerpc/kernel/vmlinux.lds.S | 9 +-------- 3 files changed, 4 insertions(+), 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 85598f7da407..1602373e539c 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -64,6 +64,7 @@ SECTIONS __initramfs_end = .; #endif . = ALIGN(4096); + __per_cpu_load = .; __per_cpu_start = .; *(.data.percpu.page_aligned) *(.data.percpu) diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index f45e4e508eca..3765efc5f963 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -213,17 +213,9 @@ SECTIONS { *(.data.cacheline_aligned) } /* Per-cpu data: */ - percpu : { } :percpu . = ALIGN(PERCPU_PAGE_SIZE); - __phys_per_cpu_start = .; - .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) - { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU_VADDR(PERCPU_ADDR, :percpu) + __phys_per_cpu_start = __per_cpu_load; . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits * into percpu page size */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 295ccc5e86b1..67f07f453385 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -181,14 +181,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { -- cgit v1.2.3-58-ga151 From a6bc3262c561780d2a6587aa3d5715b1e7d8fa13 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 16 Mar 2009 18:52:56 +1100 Subject: sparseirq, powerpc/cell: fix unused variable warning in interrupt.c This new compiler warning: arch/powerpc/platforms/cell/interrupt.c: In function 'handle_iic_irq': arch/powerpc/platforms/cell/interrupt.c:240: warning: unused variable 'cpu' Triggers because the local variable 'cpu' became unused due to commit: dee4102: sparseirq: use kstat_irqs_cpu instead Remove the variable. Signed-off-by: Stephen Rothwell Cc: Yinghai Lu Cc: ppc-dev LKML-Reference: <20090316185256.4a160374.sfr@canb.auug.org.au> Signed-off-by: Ingo Molnar --- arch/powerpc/platforms/cell/interrupt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 1f0d774ad928..882e47080e74 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -237,8 +237,6 @@ extern int noirqdebug; static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) { - const unsigned int cpu = smp_processor_id(); - spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); -- cgit v1.2.3-58-ga151 From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: New guest debug interface This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic part, controlling the "main switch" and the single-step feature. The arch specific part adds an x86 interface for intercepting both types of debug exceptions separately and re-injecting them when the host was not interested. Moveover, the foundation for guest debugging via debug registers is layed. To signal breakpoint events properly back to userland, an arch-specific data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block contains the PC, the debug exception, and relevant debug registers to tell debug events properly apart. The availability of this new interface is signaled by KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are provided. Note that both SVM and VTX are supported, but only the latter was tested yet. Based on the experience with all those VTX corner case, I would be fairly surprised if SVM will work out of the box. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 7 ++++ arch/ia64/kvm/kvm-ia64.c | 4 +- arch/powerpc/include/asm/kvm.h | 7 ++++ arch/powerpc/kvm/powerpc.c | 4 +- arch/s390/include/asm/kvm.h | 7 ++++ arch/s390/kvm/kvm-s390.c | 4 +- arch/x86/include/asm/kvm.h | 18 ++++++++ arch/x86/include/asm/kvm_host.h | 9 +--- arch/x86/kvm/svm.c | 50 +++++++++++++++++++++- arch/x86/kvm/vmx.c | 93 ++++++++++++++++------------------------- arch/x86/kvm/x86.c | 14 ++++--- include/linux/kvm.h | 51 +++++++++++++++------- include/linux/kvm_host.h | 6 +-- virt/kvm/kvm_main.c | 6 +-- 14 files changed, 179 insertions(+), 101 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index bfa86b6af7cd..be3fdb891214 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -214,4 +214,11 @@ struct kvm_sregs { struct kvm_fpu { }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 28f982045f29..de47467a0e61 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1303,8 +1303,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -EINVAL; } diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f993e4198d5c..755f1b1948c5 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -52,4 +52,11 @@ struct kvm_fpu { __u64 fpr[32]; }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5f81256287f5..7c2ad4017d6a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -240,8 +240,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_core_vcpu_put(vcpu); } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { int i; diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index e1f54654e3ae..0b2f829f6d50 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -42,4 +42,11 @@ struct kvm_fpu { __u64 fprs[16]; }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0d33893e1e89..cbfe91e10120 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -EINVAL; /* not implemented yet */ } diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 886c9402ec45..32eb96c7ca27 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -212,6 +212,24 @@ struct kvm_pit_channel_state { __s64 count_load_time; }; +struct kvm_debug_exit_arch { + __u32 exception; + __u32 pad; + __u64 pc; + __u64 dr6; + __u64 dr7; +}; + +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 +#define KVM_GUESTDBG_INJECT_DB 0x00040000 +#define KVM_GUESTDBG_INJECT_BP 0x00080000 + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { + __u64 debugreg[8]; +}; + struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53779309514a..c430cd580ee2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -135,12 +135,6 @@ enum { #define KVM_NR_MEM_OBJS 40 -struct kvm_guest_debug { - int enabled; - unsigned long bp[4]; - int singlestep; -}; - /* * We don't want allocation failures within the mmu code, so we preallocate * enough memory for a single page fault in a cache. @@ -448,8 +442,7 @@ struct kvm_x86_ops { void (*vcpu_put)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg); - void (*guest_debug_pre)(struct kvm_vcpu *vcpu); + struct kvm_guest_debug *dbg); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0fbbde54ecae..88d9062f4545 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -968,9 +968,32 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EOPNOTSUPP; + int old_debug = vcpu->guest_debug; + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->guest_debug = dbg->control; + + svm->vmcb->control.intercept_exceptions &= + ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + svm->vmcb->control.intercept_exceptions |= + 1 << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + svm->vmcb->control.intercept_exceptions |= + 1 << BP_VECTOR; + } else + vcpu->guest_debug = 0; + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) + svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + + return 0; } static int svm_get_irq(struct kvm_vcpu *vcpu) @@ -1094,6 +1117,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } +static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (!(svm->vcpu.guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + kvm_queue_exception(&svm->vcpu, DB_VECTOR); + return 1; + } + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = DB_VECTOR; + return 0; +} + +static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = BP_VECTOR; + return 0; +} + static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { int er; @@ -2050,6 +2094,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_WRITE_DR3] = emulate_on_interception, [SVM_EXIT_WRITE_DR5] = emulate_on_interception, [SVM_EXIT_WRITE_DR7] = emulate_on_interception, + [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, + [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d974c1eaa7d..f55690ddb3ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -480,8 +480,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; - if (vcpu->guest_debug.enabled) - eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + eb |= 1u << BP_VECTOR; + } if (vcpu->arch.rmode.active) eb = ~0; if (vm_need_ept()) @@ -1003,40 +1008,23 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - unsigned long dr7 = 0x400; - int old_singlestep; - - old_singlestep = vcpu->guest_debug.singlestep; - - vcpu->guest_debug.enabled = dbg->enabled; - if (vcpu->guest_debug.enabled) { - int i; - - dr7 |= 0x200; /* exact */ - for (i = 0; i < 4; ++i) { - if (!dbg->breakpoints[i].enabled) - continue; - vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; - dr7 |= 2 << (i*2); /* global enable */ - dr7 |= 0 << (i*4+16); /* execution breakpoint */ - } - - vcpu->guest_debug.singlestep = dbg->singlestep; - } else - vcpu->guest_debug.singlestep = 0; + int old_debug = vcpu->guest_debug; + unsigned long flags; - if (old_singlestep && !vcpu->guest_debug.singlestep) { - unsigned long flags; + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) + vcpu->guest_debug = 0; - flags = vmcs_readl(GUEST_RFLAGS); + flags = vmcs_readl(GUEST_RFLAGS); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); - } + vmcs_writel(GUEST_RFLAGS, flags); update_exception_bitmap(vcpu); - vmcs_writel(GUEST_DR7, dr7); return 0; } @@ -2540,24 +2528,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - - set_debugreg(dbg->bp[0], 0); - set_debugreg(dbg->bp[1], 1); - set_debugreg(dbg->bp[2], 2); - set_debugreg(dbg->bp[3], 3); - - if (dbg->singlestep) { - unsigned long flags; - - flags = vmcs_readl(GUEST_RFLAGS); - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - vmcs_writel(GUEST_RFLAGS, flags); - } -} - static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { @@ -2574,9 +2544,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * the required debugging infrastructure rework. */ switch (vec) { - case DE_VECTOR: case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return 0; + kvm_queue_exception(vcpu, vec); + return 1; case BP_VECTOR: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return 0; + /* fall through */ + case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: case UD_VECTOR: @@ -2593,7 +2571,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info, error_code; + u32 intr_info, ex_no, error_code; unsigned long cr2, rip; u32 vect_info; enum emulation_result er; @@ -2653,14 +2631,16 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } - if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | 1)) { + ex_no = intr_info & INTR_INFO_VECTOR_MASK; + if (ex_no == DB_VECTOR || ex_no == BP_VECTOR) { kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; + kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.exception = ex_no; + } else { + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = ex_no; + kvm_run->ex.error_code = error_code; } - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; - kvm_run->ex.error_code = error_code; return 0; } @@ -3600,7 +3580,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_put = vmx_vcpu_put, .set_guest_debug = set_guest_debug, - .guest_debug_pre = kvm_guest_debug_pre, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5e9932e0f62..e990d164b56d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3005,9 +3005,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - if (vcpu->guest_debug.enabled) - kvm_x86_ops->guest_debug_pre(vcpu); - vcpu->guest_mode = 1; /* * Make sure that guest_mode assignment won't happen after @@ -3218,7 +3215,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) /* * Don't leak debug flags in case they were set for guest debugging */ - if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); vcpu_put(vcpu); @@ -3837,8 +3834,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { int r; @@ -3846,6 +3843,11 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_queue_exception(vcpu, DB_VECTOR); + else if (dbg->control & KVM_GUESTDBG_INJECT_BP) + kvm_queue_exception(vcpu, BP_VECTOR); + vcpu_put(vcpu); return r; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0424326f1679..429a2ce202f9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -126,6 +126,7 @@ struct kvm_run { __u64 data_offset; /* relative to kvm_run start */ } io; struct { + struct kvm_debug_exit_arch arch; } debug; /* KVM_EXIT_MMIO */ struct { @@ -217,21 +218,6 @@ struct kvm_interrupt { __u32 irq; }; -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -/* for KVM_DEBUG_GUEST */ -struct kvm_debug_guest { - /* int */ - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - /* for KVM_GET_DIRTY_LOG */ struct kvm_dirty_log { __u32 slot; @@ -292,6 +278,17 @@ struct kvm_s390_interrupt { __u64 parm64; }; +/* for KVM_SET_GUEST_DEBUG */ + +#define KVM_GUESTDBG_ENABLE 0x00000001 +#define KVM_GUESTDBG_SINGLESTEP 0x00000002 + +struct kvm_guest_debug { + __u32 control; + __u32 pad; + struct kvm_guest_debug_arch arch; +}; + #define KVM_TRC_SHIFT 16 /* * kvm trace categories @@ -396,6 +393,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 #endif +#define KVM_CAP_SET_GUEST_DEBUG 23 /* * ioctls for VM fds @@ -440,7 +438,8 @@ struct kvm_trace_rec { #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) +/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ +#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -469,6 +468,26 @@ struct kvm_trace_rec { #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) /* Available with KVM_CAP_NMI */ #define KVM_NMI _IO(KVMIO, 0x9a) +/* Available with KVM_CAP_SET_GUEST_DEBUG */ +#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) + +/* + * Deprecated interfaces + */ +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +struct kvm_debug_guest { + __u32 enabled; + __u32 pad; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; +}; + +#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bf6f703642fc..e92212f970db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -73,7 +73,7 @@ struct kvm_vcpu { struct kvm_run *run; int guest_mode; unsigned long requests; - struct kvm_guest_debug guest_debug; + unsigned long guest_debug; int fpu_active; int guest_fpu_loaded; wait_queue_head_t wq; @@ -255,8 +255,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg); +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); int kvm_arch_init(void *opaque); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 29a667ce35b0..f83ef9c7e89b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1755,13 +1755,13 @@ out_free2: r = 0; break; } - case KVM_DEBUG_GUEST: { - struct kvm_debug_guest dbg; + case KVM_SET_GUEST_DEBUG: { + struct kvm_guest_debug dbg; r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; - r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); + r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); if (r) goto out; r = 0; -- cgit v1.2.3-58-ga151 From 989c0f0ed56468a4aa48711cef5acf122a40d1dd Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 18 Dec 2008 12:33:18 +0100 Subject: KVM: Remove old kvm_guest_debug structs Remove the remaining arch fragments of the old guest debug interface that now break non-x86 builds. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 2 -- arch/powerpc/include/asm/kvm_host.h | 6 ------ arch/s390/include/asm/kvm_host.h | 3 --- 3 files changed, 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 7da0c0963226..46f8b0eb5684 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -239,8 +239,6 @@ struct kvm_vm_data { struct kvm; struct kvm_vcpu; -struct kvm_guest_debug{ -}; struct kvm_mmio_req { uint64_t addr; /* physical address */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index c1e436fe7738..a22600537a8c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -200,10 +200,4 @@ struct kvm_vcpu_arch { unsigned long pending_exceptions; }; -struct kvm_guest_debug { - int enabled; - unsigned long bp[4]; - int singlestep; -}; - #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3c55e4107dcc..c6e674f5fca9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -21,9 +21,6 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 -struct kvm_guest_debug { -}; - struct sca_entry { atomic_t scn; __u64 reserved; -- cgit v1.2.3-58-ga151 From c46fb0211f8b93de45400bff814c0f29335af5fc Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:22:58 -0600 Subject: KVM: ppc: move struct kvmppc_44x_tlbe into 44x-specific header Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_44x.h | 7 +++++++ arch/powerpc/include/asm/kvm_host.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index f49031b632ca..d22d39942a92 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -28,6 +28,13 @@ * need to find some way of advertising it. */ #define KVM44x_GUEST_TLB_SIZE 64 +struct kvmppc_44x_tlbe { + u32 tid; /* Only the low 8 bits are used. */ + u32 word0; + u32 word1; + u32 word2; +}; + struct kvmppc_44x_shadow_ref { struct page *page; u16 gtlb_index; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a22600537a8c..65c4d492d722 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -64,13 +64,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvmppc_44x_tlbe { - u32 tid; /* Only the low 8 bits are used. */ - u32 word0; - u32 word1; - u32 word2; -}; - enum kvm_exit_types { MMIO_EXITS, DCR_EXITS, -- cgit v1.2.3-58-ga151 From ecc0981ff07cbe7cdf95de20be5b24fee8e49cb5 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:22:59 -0600 Subject: KVM: ppc: cosmetic changes to mmu hook names Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 5 +++-- arch/powerpc/kvm/44x_tlb.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 36d2a50a8487..7ba95d28b837 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,13 +52,14 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +/* Core-specific hooks */ + extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, u32 flags, u32 max_bytes, unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); - -/* Core-specific hooks */ +extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 9a34b8edb9e2..8f9c09cbb833 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -248,7 +248,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); } -void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7c2ad4017d6a..f30be9ef2314 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -216,7 +216,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - kvmppc_core_destroy_mmu(vcpu); + kvmppc_mmu_destroy(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -- cgit v1.2.3-58-ga151 From 475e7cdd69101939006659a63c2e4a32d5b71389 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:00 -0600 Subject: KVM: ppc: small cosmetic changes to Book E DTLB miss handler Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 35485dd6927e..d196ae619303 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -290,6 +290,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; int gtlb_index; + gpa_t gpaddr; gfn_t gfn; /* Check the guest TLB. */ @@ -305,8 +306,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); - gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; + gpaddr = tlb_xlate(gtlbe, eaddr); + gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { /* The guest TLB had a mapping, but the shadow TLB @@ -315,13 +316,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ + vcpu->arch.paddr_accessed = gpaddr; r = kvmppc_emulate_mmio(run, vcpu); kvmppc_account_exit(vcpu, MMIO_EXITS); } -- cgit v1.2.3-58-ga151 From 58a96214a306fc7fc66105097eea9c4f3bfa35bc Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:01 -0600 Subject: KVM: ppc: change kvmppc_mmu_map() parameters Passing just the TLB index will ease an e500 implementation. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 1 - arch/powerpc/kvm/44x_tlb.c | 15 +++++++-------- arch/powerpc/kvm/booke.c | 6 ++---- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 7ba95d28b837..f661f8ba3ab8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -55,7 +55,6 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); /* Core-specific hooks */ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - u64 asid, u32 flags, u32 max_bytes, unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 8f9c09cbb833..e8ed22f28eae 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -269,15 +269,19 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) * Caller must ensure that the specified guest TLB entry is safe to insert into * the shadow TLB. */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, - u32 flags, u32 max_bytes, unsigned int gtlb_index) +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + unsigned int gtlb_index) { struct kvmppc_44x_tlbe stlbe; struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; struct kvmppc_44x_shadow_ref *ref; struct page *new_page; hpa_t hpaddr; gfn_t gfn; + u32 asid = gtlbe->tid; + u32 flags = gtlbe->word2; + u32 max_bytes = get_tlb_bytes(gtlbe); unsigned int victim; /* Select TLB entry to clobber. Indirectly guard against races with the TLB @@ -448,10 +452,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) } if (tlbe_is_host_safe(vcpu, tlbe)) { - u64 asid; gva_t eaddr; gpa_t gpaddr; - u32 flags; u32 bytes; eaddr = get_tlb_eaddr(tlbe); @@ -462,10 +464,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) eaddr &= ~(bytes - 1); gpaddr &= ~(bytes - 1); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - flags = tlbe->word2 & 0xffff; - - kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); } KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index d196ae619303..85b9e2fc6c6b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -316,8 +316,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, - gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { @@ -364,8 +363,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, - gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); } else { /* Guest mapped and leaped at non-RAM! */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); -- cgit v1.2.3-58-ga151 From be8d1cae07d5acf4a61046d7def5eda40f0981e1 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:02 -0600 Subject: KVM: ppc: turn tlb_xlate() into a per-core hook (and give it a better name) Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/44x.c | 6 +----- arch/powerpc/kvm/44x_tlb.c | 10 ++++++++++ arch/powerpc/kvm/44x_tlb.h | 7 ------- arch/powerpc/kvm/booke.c | 10 ++-------- 5 files changed, 15 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f661f8ba3ab8..9fd70aa916d9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -59,6 +59,8 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); +extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, + gva_t eaddr); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index a66bec57265a..8383603dd8a4 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -149,8 +149,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; int index; gva_t eaddr; u8 pid; @@ -166,9 +164,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } - gtlbe = &vcpu_44x->guest_tlb[index]; - - tr->physical_address = tlb_xlate(gtlbe, eaddr); + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); /* XXX what does "writeable" and "usermode" even mean? */ tr->valid = 1; diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index e8ed22f28eae..2f14671e8a15 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -208,6 +208,16 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, return -1; } +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, + gva_t eaddr) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h index 772191f29e62..05b6f7eef5b6 100644 --- a/arch/powerpc/kvm/44x_tlb.h +++ b/arch/powerpc/kvm/44x_tlb.h @@ -85,11 +85,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) return (vcpu->arch.mmucr >> 16) & 0x1; } -static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) -{ - unsigned int pgmask = get_tlb_bytes(tlbe) - 1; - - return get_tlb_raddr(tlbe) | (eaddr & pgmask); -} - #endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 85b9e2fc6c6b..56d6ed69ed60 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -286,8 +286,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; int gtlb_index; gpa_t gpaddr; @@ -305,8 +303,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - gpaddr = tlb_xlate(gtlbe, eaddr); + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { @@ -332,8 +329,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.pc; gpa_t gpaddr; gfn_t gfn; @@ -352,8 +347,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); - gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - gpaddr = tlb_xlate(gtlbe, eaddr); + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { -- cgit v1.2.3-58-ga151 From fa86b8dda2e0faccefbeda61edc02a50bd588f4f Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:03 -0600 Subject: KVM: ppc: rename 44x MMU functions used in booke.c e500 will provide its own implementation of these. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/44x_tlb.c | 4 ++-- arch/powerpc/kvm/44x_tlb.h | 2 -- arch/powerpc/kvm/booke.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9fd70aa916d9..5e80a20f32b8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -59,6 +59,8 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); +extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, gva_t eaddr); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 2f14671e8a15..e67b7313ffc3 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -218,14 +218,14 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, return get_tlb_raddr(gtlbe) | (eaddr & pgmask); } -int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_DS); diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h index 05b6f7eef5b6..a9ff80e51526 100644 --- a/arch/powerpc/kvm/44x_tlb.h +++ b/arch/powerpc/kvm/44x_tlb.h @@ -25,8 +25,6 @@ extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as); -extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); -extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 56d6ed69ed60..1e692ac16e99 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -292,7 +292,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, gfn_t gfn; /* Check the guest TLB. */ - gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); @@ -337,7 +337,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; /* Check the guest TLB. */ - gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); -- cgit v1.2.3-58-ga151 From f44353610b584fcbc31e363f35594796c6446d63 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:04 -0600 Subject: KVM: ppc: remove last 44x-specific bits from booke.c Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1e692ac16e99..a73b3959e41c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -30,10 +30,8 @@ #include #include "timing.h" #include -#include #include "booke.h" -#include "44x_tlb.h" unsigned long kvmppc_booke_handlers; @@ -284,7 +282,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; - /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { unsigned long eaddr = vcpu->arch.fault_dear; int gtlb_index; @@ -327,7 +324,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { unsigned long eaddr = vcpu->arch.pc; gpa_t gpaddr; -- cgit v1.2.3-58-ga151 From cea5d8c9de669e30ed6d60930318376d5cc42e9e Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:05 -0600 Subject: KVM: ppc: use macros instead of hardcoded literals for instruction decoding Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/emulate.c | 93 +++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 30 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index d1d38daa93fb..a561d6e8da1c 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -30,6 +30,39 @@ #include #include "timing.h" +#define OP_TRAP 3 + +#define OP_31_XOP_LWZX 23 +#define OP_31_XOP_LBZX 87 +#define OP_31_XOP_STWX 151 +#define OP_31_XOP_STBX 215 +#define OP_31_XOP_STBUX 247 +#define OP_31_XOP_LHZX 279 +#define OP_31_XOP_LHZUX 311 +#define OP_31_XOP_MFSPR 339 +#define OP_31_XOP_STHX 407 +#define OP_31_XOP_STHUX 439 +#define OP_31_XOP_MTSPR 467 +#define OP_31_XOP_DCBI 470 +#define OP_31_XOP_LWBRX 534 +#define OP_31_XOP_TLBSYNC 566 +#define OP_31_XOP_STWBRX 662 +#define OP_31_XOP_LHBRX 790 +#define OP_31_XOP_STHBRX 918 + +#define OP_LWZ 32 +#define OP_LWZU 33 +#define OP_LBZ 34 +#define OP_LBZU 35 +#define OP_STW 36 +#define OP_STWU 37 +#define OP_STB 38 +#define OP_STBU 39 +#define OP_LHZ 40 +#define OP_LHZU 41 +#define OP_STH 44 +#define OP_STHU 45 + void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { if (vcpu->arch.tcr & TCR_DIE) { @@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); switch (get_op(inst)) { - case 3: /* trap */ + case OP_TRAP: vcpu->arch.esr |= ESR_PTR; kvmppc_core_queue_program(vcpu); advance = 0; @@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case 31: switch (get_xop(inst)) { - case 23: /* lwzx */ + case OP_31_XOP_LWZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 87: /* lbzx */ + case OP_31_XOP_LBZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 151: /* stwx */ + case OP_31_XOP_STWX: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 4, 1); break; - case 215: /* stbx */ + case OP_31_XOP_STBX: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 1, 1); break; - case 247: /* stbux */ + case OP_31_XOP_STBUX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[rs] = ea; break; - case 279: /* lhzx */ + case OP_31_XOP_LHZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; - case 311: /* lhzux */ + case OP_31_XOP_LHZUX: rt = get_rt(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 339: /* mfspr */ + case OP_31_XOP_MFSPR: sprn = get_sprn(inst); rt = get_rt(inst); @@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; - case 407: /* sthx */ + case OP_31_XOP_STHX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 2, 1); break; - case 439: /* sthux */ + case OP_31_XOP_STHUX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 467: /* mtspr */ + case OP_31_XOP_MTSPR: sprn = get_sprn(inst); rs = get_rs(inst); switch (sprn) { @@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; - case 470: /* dcbi */ + case OP_31_XOP_DCBI: /* Do nothing. The guest is performing dcbi because * hardware DMA is not snooped by the dcache, but * emulated DMA either goes through the dcache as @@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) * coherence. */ break; - case 534: /* lwbrx */ + case OP_31_XOP_LWBRX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); break; - case 566: /* tlbsync */ + case OP_31_XOP_TLBSYNC: break; - case 662: /* stwbrx */ + case OP_31_XOP_STWBRX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 0); break; - case 790: /* lhbrx */ + case OP_31_XOP_LHBRX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); break; - case 918: /* sthbrx */ + case OP_31_XOP_STHBRX: rs = get_rs(inst); ra = get_ra(inst); rb = get_rb(inst); @@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; - case 32: /* lwz */ + case OP_LWZ: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 33: /* lwzu */ + case OP_LWZU: ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 34: /* lbz */ + case OP_LBZ: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 35: /* lbzu */ + case OP_LBZU: ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 36: /* stw */ + case OP_STW: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 4, 1); break; - case 37: /* stwu */ + case OP_STWU: ra = get_ra(inst); rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], @@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 38: /* stb */ + case OP_STB: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 1, 1); break; - case 39: /* stbu */ + case OP_STBU: ra = get_ra(inst); rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], @@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 40: /* lhz */ + case OP_LHZ: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; - case 41: /* lhzu */ + case OP_LHZU: ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; break; - case 44: /* sth */ + case OP_STH: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 2, 1); break; - case 45: /* sthu */ + case OP_STHU: ra = get_ra(inst); rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], -- cgit v1.2.3-58-ga151 From d0c7dc03442f7cbd8740d9a4e7a4e7f84bfa676d Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:06 -0600 Subject: KVM: ppc: split out common Book E instruction emulation The Book E code will be shared with e500. I've left PID in kvmppc_core_emulate_op() just so that we don't need to move kvmppc_set_pid() right now. Once we have the e500 implementation, we can probably share that too. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/44x_emulate.c | 217 +++----------------------------- arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/booke.h | 6 + arch/powerpc/kvm/booke_emulate.c | 262 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+), 202 deletions(-) create mode 100644 arch/powerpc/kvm/booke_emulate.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 82489a743a6f..61af58fcecee 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -27,25 +27,12 @@ #include "booke.h" #include "44x_tlb.h" -#define OP_RFI 19 - -#define XOP_RFI 50 -#define XOP_MFMSR 83 -#define XOP_WRTEE 131 -#define XOP_MTMSR 146 -#define XOP_WRTEEI 163 #define XOP_MFDCR 323 #define XOP_MTDCR 451 #define XOP_TLBSX 914 #define XOP_ICCCI 966 #define XOP_TLBWE 978 -static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); -} - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int ws; switch (get_op(inst)) { - case OP_RFI: - switch (get_xop(inst)) { - case XOP_RFI: - kvmppc_emul_rfi(vcpu); - kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); - *advance = 0; - break; - - default: - emulated = EMULATE_FAIL; - break; - } - break; - case 31: switch (get_xop(inst)) { - case XOP_MFMSR: - rt = get_rt(inst); - vcpu->arch.gpr[rt] = vcpu->arch.msr; - kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); - break; - - case XOP_MTMSR: - rs = get_rs(inst); - kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); - break; - - case XOP_WRTEE: - rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (vcpu->arch.gpr[rs] & MSR_EE); - kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); - break; - - case XOP_WRTEEI: - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (inst & MSR_EE); - kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); - break; - case XOP_MFDCR: dcrn = get_dcrn(inst); rt = get_rt(inst); @@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_FAIL; } + if (emulated == EMULATE_FAIL) + emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + return emulated; } int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) { + int emulated = EMULATE_DONE; + switch (sprn) { - case SPRN_MMUCR: - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; case SPRN_PID: kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; case SPRN_CCR0: vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; case SPRN_CCR1: vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; - case SPRN_DEAR: - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; - case SPRN_ESR: - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR0: - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR1: - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; - case SPRN_TSR: - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; - case SPRN_TCR: - vcpu->arch.tcr = vcpu->arch.gpr[rs]; - kvmppc_emulate_dec(vcpu); - break; - - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ - case SPRN_SPRG4: - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG5: - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG6: - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG7: - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; - - case SPRN_IVPR: - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR0: - vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR1: - vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR2: - vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR3: - vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR4: - vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR5: - vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR6: - vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR7: - vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR8: - vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR9: - vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR10: - vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR11: - vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR12: - vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR13: - vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR14: - vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; - break; - case SPRN_IVOR15: - vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; - break; - default: - return EMULATE_FAIL; + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); - return EMULATE_DONE; + return emulated; } int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) { + int emulated = EMULATE_DONE; + switch (sprn) { - /* 440 */ + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; case SPRN_MMUCR: vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; case SPRN_CCR0: vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; case SPRN_CCR1: vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; - - /* Book E */ - case SPRN_PID: - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; - case SPRN_IVPR: - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; - case SPRN_DEAR: - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; - case SPRN_ESR: - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; - case SPRN_DBCR0: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; - case SPRN_DBCR1: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; - - case SPRN_IVOR0: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; - break; - case SPRN_IVOR1: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; - break; - case SPRN_IVOR2: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; - break; - case SPRN_IVOR3: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; - break; - case SPRN_IVOR4: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; - break; - case SPRN_IVOR5: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; - break; - case SPRN_IVOR6: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; - break; - case SPRN_IVOR7: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; - break; - case SPRN_IVOR8: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; - break; - case SPRN_IVOR9: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; - break; - case SPRN_IVOR10: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; - break; - case SPRN_IVOR11: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; - break; - case SPRN_IVOR12: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; - break; - case SPRN_IVOR13: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; - break; - case SPRN_IVOR14: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; - break; - case SPRN_IVOR15: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; - break; - default: - return EMULATE_FAIL; + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); - return EMULATE_DONE; + return emulated; } diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index df7ba59e6d53..3ef5261828e2 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -16,6 +16,7 @@ AFLAGS_booke_interrupts.o := -I$(obj) kvm-440-objs := \ booke.o \ + booke_emulate.o \ booke_interrupts.o \ 44x.o \ 44x_tlb.o \ diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index cf7c94ca24bf..311fdbc23fbe 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -22,6 +22,7 @@ #include #include +#include #include "timing.h" /* interrupt priortity ordering */ @@ -57,4 +58,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) }; } +int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance); +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); + #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c new file mode 100644 index 000000000000..8aa78f1a1f87 --- /dev/null +++ b/arch/powerpc/kvm/booke_emulate.c @@ -0,0 +1,262 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard + */ + +#include +#include + +#include "booke.h" + +#define OP_19_XOP_RFI 50 + +#define OP_31_XOP_MFMSR 83 +#define OP_31_XOP_WRTEE 131 +#define OP_31_XOP_MTMSR 146 +#define OP_31_XOP_WRTEEI 163 + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int rs; + int rt; + + switch (get_op(inst)) { + case 19: + switch (get_xop(inst)) { + case OP_19_XOP_RFI: + kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case OP_31_XOP_MFMSR: + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); + break; + + case OP_31_XOP_MTMSR: + rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case OP_31_XOP_WRTEE: + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case OP_31_XOP_WRTEEI: + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR0: + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR1: + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR2: + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR3: + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR4: + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR5: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR6: + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR7: + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR8: + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR9: + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR10: + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR11: + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR12: + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR13: + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR14: + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR15: + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} -- cgit v1.2.3-58-ga151 From f7b200af8f1da748cc67993caeff3923d6014070 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:07 -0600 Subject: KVM: ppc: Add dbsr in kvm_vcpu_arch Kernel for E500 need clear dbsr when startup. So add dbsr register in kvm_vcpu_arch for BOOKE. Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/booke_emulate.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 65c4d492d722..50e5ce1b400a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -163,6 +163,7 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbcr0; u32 dbcr1; + u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING struct kvmppc_exit_timing timing_exit; diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 8aa78f1a1f87..aebc65e93f4b 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -111,6 +111,8 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; case SPRN_DBCR1: vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DBSR: + vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break; case SPRN_TSR: vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; case SPRN_TCR: @@ -204,6 +206,8 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; case SPRN_DBCR1: vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + case SPRN_DBSR: + vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break; case SPRN_IVOR0: vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; -- cgit v1.2.3-58-ga151 From 366d4b9b9fdda282006b97316f8038cd36f8ecb7 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:08 -0600 Subject: KVM: ppc: No need to include core-header for KVM in asm-offsets.c currently Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kernel/asm-offsets.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 19ee491e9e23..42fe4da4e8ae 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -49,7 +49,7 @@ #include #endif #ifdef CONFIG_KVM -#include +#include #endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -361,8 +361,6 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); - DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); -- cgit v1.2.3-58-ga151 From 17c885eb5c38f39a2bb3143a67687bd905dcd0fa Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:09 -0600 Subject: KVM: ppc: ifdef iccci with CONFIG_44x E500 deosn't support this instruction. Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke_interrupts.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 084ebcd7dd83..4679ec287e62 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -347,7 +347,9 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 +#ifdef CONFIG_44x iccci 0, 0 /* XXX hack */ +#endif /* Load some guest volatiles. */ lwz r0, VCPU_GPR(r0)(r4) -- cgit v1.2.3-58-ga151 From bc8080cbcc8870178f0910edd537d0cb5706d703 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:10 -0600 Subject: KVM: ppc: E500 core-specific code Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_e500.h | 67 ++++ arch/powerpc/kvm/Kconfig | 13 + arch/powerpc/kvm/Makefile | 9 + arch/powerpc/kvm/e500.c | 151 ++++++++ arch/powerpc/kvm/e500_emulate.c | 169 +++++++++ arch/powerpc/kvm/e500_tlb.c | 737 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/e500_tlb.h | 184 +++++++++ 7 files changed, 1330 insertions(+) create mode 100644 arch/powerpc/include/asm/kvm_e500.h create mode 100644 arch/powerpc/kvm/e500.c create mode 100644 arch/powerpc/kvm/e500_emulate.c create mode 100644 arch/powerpc/kvm/e500_tlb.c create mode 100644 arch/powerpc/kvm/e500_tlb.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h new file mode 100644 index 000000000000..9d497ce49726 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, + * + * Description: + * This file is derived from arch/powerpc/include/asm/kvm_44x.h, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_KVM_E500_H__ +#define __ASM_KVM_E500_H__ + +#include + +#define BOOKE_INTERRUPT_SIZE 36 + +#define E500_PID_NUM 3 +#define E500_TLB_NUM 2 + +struct tlbe{ + u32 mas1; + u32 mas2; + u32 mas3; + u32 mas7; +}; + +struct kvmppc_vcpu_e500 { + /* Unmodified copy of the guest's TLB. */ + struct tlbe *guest_tlb[E500_TLB_NUM]; + /* TLB that's actually used when the guest is running. */ + struct tlbe *shadow_tlb[E500_TLB_NUM]; + /* Pages which are referenced in the shadow TLB. */ + struct page **shadow_pages[E500_TLB_NUM]; + + unsigned int guest_tlb_size[E500_TLB_NUM]; + unsigned int shadow_tlb_size[E500_TLB_NUM]; + unsigned int guest_tlb_nv[E500_TLB_NUM]; + + u32 host_pid[E500_PID_NUM]; + u32 pid[E500_PID_NUM]; + + u32 mas0; + u32 mas1; + u32 mas2; + u32 mas3; + u32 mas4; + u32 mas5; + u32 mas6; + u32 mas7; + u32 l1csr1; + u32 hid0; + u32 hid1; + + struct kvm_vcpu vcpu; +}; + +static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); +} + +#endif /* __ASM_KVM_E500_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 6dbdc4817d80..146570550142 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -43,6 +43,19 @@ config KVM_EXIT_TIMING If unsure, say N. +config KVM_E500 + bool "KVM support for PowerPC E500 processors" + depends on EXPERIMENTAL && E500 + select KVM + ---help--- + Support running unmodified E500 guest kernels in virtual machines on + E500 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + config KVM_TRACE bool "KVM trace support" depends on KVM && MARKERS && SYSFS diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3ef5261828e2..4b2df66c79d8 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -22,3 +22,12 @@ kvm-440-objs := \ 44x_tlb.o \ 44x_emulate.o obj-$(CONFIG_KVM_440) += kvm-440.o + +kvm-e500-objs := \ + booke.o \ + booke_emulate.o \ + booke_interrupts.o \ + e500.o \ + e500_tlb.o \ + e500_emulate.o +obj-$(CONFIG_KVM_E500) += kvm-e500.o diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c new file mode 100644 index 000000000000..7992da497cd4 --- /dev/null +++ b/arch/powerpc/kvm/e500.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, + * + * Description: + * This file is derived from arch/powerpc/kvm/44x.c, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "e500_tlb.h" + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_e500_tlb_load(vcpu, cpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_e500_tlb_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_e500_tlb_setup(vcpu_e500); + + /* Use the same core vertion as host's */ + vcpu->arch.pvr = mfspr(SPRN_PVR); + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); + if (index < 0) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_e500 *vcpu_e500; + struct kvm_vcpu *vcpu; + int err; + + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_e500) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_e500->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = kvmppc_e500_tlb_init(vcpu_e500); + if (err) + goto uninit_vcpu; + + return vcpu; + +uninit_vcpu: + kvm_vcpu_uninit(vcpu); +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_e500_tlb_uninit(vcpu_e500); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +} + +static int kvmppc_e500_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); +} + +static void kvmppc_e500_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_e500_init); +module_exit(kvmppc_e500_exit); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c new file mode 100644 index 000000000000..a47f44cd2cfd --- /dev/null +++ b/arch/powerpc/kvm/e500_emulate.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, + * + * Description: + * This file is derived from arch/powerpc/kvm/44x_emulate.c, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include "booke.h" +#include "e500_tlb.h" + +#define XOP_TLBIVAX 786 +#define XOP_TLBSX 914 +#define XOP_TLBRE 946 +#define XOP_TLBWE 978 + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int ra; + int rb; + int rs; + int rt; + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + + case XOP_TLBRE: + emulated = kvmppc_e500_emul_tlbre(vcpu); + break; + + case XOP_TLBWE: + emulated = kvmppc_e500_emul_tlbwe(vcpu); + break; + + case XOP_TLBSX: + rb = get_rb(inst); + emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); + break; + + case XOP_TLBIVAX: + ra = get_ra(inst); + rb = get_rb(inst); + emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + if (emulated == EMULATE_FAIL) + emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_PID: + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = vcpu->arch.gpr[rs]; + break; + case SPRN_PID1: + vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break; + case SPRN_PID2: + vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break; + case SPRN_MAS0: + vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS1: + vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS2: + vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS3: + vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS4: + vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS6: + vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break; + case SPRN_MAS7: + vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break; + case SPRN_L1CSR1: + vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break; + case SPRN_HID0: + vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break; + case SPRN_HID1: + vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; + + default: + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + } + + return emulated; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int emulated = EMULATE_DONE; + + switch (sprn) { + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break; + case SPRN_PID1: + vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break; + case SPRN_PID2: + vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break; + case SPRN_MAS0: + vcpu->arch.gpr[rt] = vcpu_e500->mas0; break; + case SPRN_MAS1: + vcpu->arch.gpr[rt] = vcpu_e500->mas1; break; + case SPRN_MAS2: + vcpu->arch.gpr[rt] = vcpu_e500->mas2; break; + case SPRN_MAS3: + vcpu->arch.gpr[rt] = vcpu_e500->mas3; break; + case SPRN_MAS4: + vcpu->arch.gpr[rt] = vcpu_e500->mas4; break; + case SPRN_MAS6: + vcpu->arch.gpr[rt] = vcpu_e500->mas6; break; + case SPRN_MAS7: + vcpu->arch.gpr[rt] = vcpu_e500->mas7; break; + + case SPRN_TLB0CFG: + vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG); + vcpu->arch.gpr[rt] &= ~0xfffUL; + vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0]; + break; + + case SPRN_TLB1CFG: + vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG); + vcpu->arch.gpr[rt] &= ~0xfffUL; + vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1]; + break; + + case SPRN_L1CSR1: + vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break; + case SPRN_HID0: + vcpu->arch.gpr[rt] = vcpu_e500->hid0; break; + case SPRN_HID1: + vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; + + default: + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + } + + return emulated; +} + diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c new file mode 100644 index 000000000000..6a50340f575e --- /dev/null +++ b/arch/powerpc/kvm/e500_tlb.c @@ -0,0 +1,737 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.c, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "e500_tlb.h" + +#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) + +static unsigned int tlb1_entry_num; + +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe *tlbe; + int i, tlbsel; + + printk("| %8s | %8s | %8s | %8s | %8s |\n", + "nr", "mas1", "mas2", "mas3", "mas7"); + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + printk("Guest TLB%d:\n", tlbsel); + for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { + tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; + if (tlbe->mas1 & MAS1_VALID) + printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", + tlbsel, i, tlbe->mas1, tlbe->mas2, + tlbe->mas3, tlbe->mas7); + } + } + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + printk("Shadow TLB%d:\n", tlbsel); + for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) { + tlbe = &vcpu_e500->shadow_tlb[tlbsel][i]; + if (tlbe->mas1 & MAS1_VALID) + printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n", + tlbsel, i, tlbe->mas1, tlbe->mas2, + tlbe->mas3, tlbe->mas7); + } + } +} + +static inline unsigned int tlb0_get_next_victim( + struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned int victim; + + victim = vcpu_e500->guest_tlb_nv[0]++; + if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) + vcpu_e500->guest_tlb_nv[0] = 0; + + return victim; +} + +static inline unsigned int tlb1_max_shadow_size(void) +{ + return tlb1_entry_num - tlbcam_index; +} + +static inline int tlbe_is_writable(struct tlbe *tlbe) +{ + return tlbe->mas3 & (MAS3_SW|MAS3_UW); +} + +static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +{ + /* Mask off reserved bits. */ + mas3 &= MAS3_ATTRIB_MASK; + + if (!usermode) { + /* Guest is in supervisor mode, + * so we need to translate guest + * supervisor permissions into user permissions. */ + mas3 &= ~E500_TLB_USER_PERM_MASK; + mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; + } + + return mas3 | E500_TLB_SUPER_PERM_MASK; +} + +static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) +{ + return mas2 & MAS2_ATTRIB_MASK; +} + +/* + * writing shadow tlb entry to host TLB + */ +static inline void __write_host_tlbe(struct tlbe *stlbe) +{ + mtspr(SPRN_MAS1, stlbe->mas1); + mtspr(SPRN_MAS2, stlbe->mas2); + mtspr(SPRN_MAS3, stlbe->mas3); + mtspr(SPRN_MAS7, stlbe->mas7); + __asm__ __volatile__ ("tlbwe\n" : : ); +} + +static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + + local_irq_disable(); + if (tlbsel == 0) { + __write_host_tlbe(stlbe); + } else { + unsigned register mas0; + + mas0 = mfspr(SPRN_MAS0); + + mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel))); + __write_host_tlbe(stlbe); + + mtspr(SPRN_MAS0, mas0); + } + local_irq_enable(); +} + +void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int i; + unsigned register mas0; + + /* Load all valid TLB1 entries to reduce guest tlb miss fault */ + local_irq_disable(); + mas0 = mfspr(SPRN_MAS0); + for (i = 0; i < tlb1_max_shadow_size(); i++) { + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; + + if (get_tlb_v(stlbe)) { + mtspr(SPRN_MAS0, MAS0_TLBSEL(1) + | MAS0_ESEL(to_htlb1_esel(i))); + __write_host_tlbe(stlbe); + } + } + mtspr(SPRN_MAS0, mas0); + local_irq_enable(); +} + +void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) +{ + _tlbia(); +} + +/* Search the guest TLB for a matching entry. */ +static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, + gva_t eaddr, int tlbsel, unsigned int pid, int as) +{ + int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) { + struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as && as != -1) + continue; + + return i; + } + + return -1; +} + +static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + struct page *page = vcpu_e500->shadow_pages[tlbsel][esel]; + + if (page) { + vcpu_e500->shadow_pages[tlbsel][esel] = NULL; + + if (get_tlb_v(stlbe)) { + if (tlbe_is_writable(stlbe)) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); + } + } +} + +static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + + kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); + stlbe->mas1 = 0; + KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel), + stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7, + handler); +} + +static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + gva_t eaddr, gva_t eend, u32 tid) +{ + unsigned int pid = tid & 0xff; + unsigned int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) { + struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i]; + unsigned int tid; + + if (!get_tlb_v(stlbe)) + continue; + + if (eend < get_tlb_eaddr(stlbe)) + continue; + + if (eaddr > get_tlb_end(stlbe)) + continue; + + tid = get_tlb_tid(stlbe); + if (tid && (tid != pid)) + continue; + + kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); + write_host_tlbe(vcpu_e500, 1, i); + } +} + +static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, + unsigned int eaddr, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int victim, pidsel, tsized; + int tlbsel; + + /* since we only have tow TLBs, only lower bit is used. */ + tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; + victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; + pidsel = (vcpu_e500->mas4 >> 16) & 0xf; + tsized = (vcpu_e500->mas4 >> 8) & 0xf; + + vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) + | MAS1_TID(vcpu_e500->pid[pidsel]) + | MAS1_TSIZE(tsized); + vcpu_e500->mas2 = (eaddr & MAS2_EPN) + | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); + vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) + | (get_cur_pid(vcpu) << 16) + | (as ? MAS6_SAS : 0); + vcpu_e500->mas7 = 0; +} + +static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel) +{ + struct page *new_page; + struct tlbe *stlbe; + hpa_t hpaddr; + + stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel]; + + /* Get reference to new page. */ + new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn); + if (is_error_page(new_page)) { + printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); + kvm_release_page_clean(new_page); + return; + } + hpaddr = page_to_phys(new_page); + + /* Drop reference to old page. */ + kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel); + + vcpu_e500->shadow_pages[tlbsel][esel] = new_page; + + /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K) + | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) + | e500_shadow_mas2_attrib(gtlbe->mas2, + vcpu_e500->vcpu.arch.msr & MSR_PR); + stlbe->mas3 = (hpaddr & MAS3_RPN) + | e500_shadow_mas3_attrib(gtlbe->mas3, + vcpu_e500->vcpu.arch.msr & MSR_PR); + stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN; + + KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel), + stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7, + handler); +} + +/* XXX only map the one-one case, for now use TLB0 */ +static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *gtlbe; + + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + + kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_raddr(gtlbe) >> PAGE_SHIFT, + gtlbe, tlbsel, esel); + + return esel; +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +/* XXX for both one-one and one-to-many , for now use TLB1 */ +static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe) +{ + unsigned int victim; + + victim = vcpu_e500->guest_tlb_nv[1]++; + + if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size())) + vcpu_e500->guest_tlb_nv[1] = 0; + + kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim); + + return victim; +} + +/* Invalidate all guest kernel mappings when enter usermode, + * so that when they fault back in they will get the + * proper permission bits. */ +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + if (usermode) { + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int i; + + /* XXX Replace loop with fancy data structures. */ + /* needn't set modified since tlbia will make TLB1 coherent */ + for (i = 0; i < tlb1_max_shadow_size(); i++) + kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); + + _tlbia(); + } +} + +static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int esel) +{ + struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + + if (unlikely(get_tlb_iprot(gtlbe))) + return -1; + + if (tlbsel == 1) { + kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_end(gtlbe), + get_tlb_tid(gtlbe)); + } else { + kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); + } + + gtlbe->mas1 = 0; + + return 0; +} + +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int ia; + int esel, tlbsel; + gva_t ea; + + ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb]; + + ia = (ea >> 2) & 0x1; + + /* since we only have tow TLBs, only lower bit is used. */ + tlbsel = (ea >> 3) & 0x1; + + if (ia) { + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } else { + ea &= 0xfffff000; + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, + get_cur_pid(vcpu), -1); + if (esel >= 0) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + + _tlbia(); + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel, esel; + struct tlbe *gtlbe; + + tlbsel = get_tlb_tlbsel(vcpu_e500); + esel = get_tlb_esel(vcpu_e500, tlbsel); + + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + vcpu_e500->mas0 &= MAS0_NV(0); + vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = gtlbe->mas1; + vcpu_e500->mas2 = gtlbe->mas2; + vcpu_e500->mas3 = gtlbe->mas3; + vcpu_e500->mas7 = gtlbe->mas7; + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int as = !!get_cur_sas(vcpu_e500); + unsigned int pid = get_cur_spid(vcpu_e500); + int esel, tlbsel; + struct tlbe *gtlbe = NULL; + gva_t ea; + + ea = vcpu->arch.gpr[rb]; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); + if (esel >= 0) { + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + break; + } + } + + if (gtlbe) { + vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) + | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = gtlbe->mas1; + vcpu_e500->mas2 = gtlbe->mas2; + vcpu_e500->mas3 = gtlbe->mas3; + vcpu_e500->mas7 = gtlbe->mas7; + } else { + int victim; + + /* since we only have tow TLBs, only lower bit is used. */ + tlbsel = vcpu_e500->mas4 >> 28 & 0x1; + victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; + + vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) + | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); + vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) + | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) + | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); + vcpu_e500->mas2 &= MAS2_EPN; + vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; + vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; + vcpu_e500->mas7 = 0; + } + + return EMULATE_DONE; +} + +int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + u64 eaddr; + u64 raddr; + u32 tid; + struct tlbe *gtlbe; + int tlbsel, esel, stlbsel, sesel; + + tlbsel = get_tlb_tlbsel(vcpu_e500); + esel = get_tlb_esel(vcpu_e500, tlbsel); + + gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; + + if (get_tlb_v(gtlbe) && tlbsel == 1) { + eaddr = get_tlb_eaddr(gtlbe); + tid = get_tlb_tid(gtlbe); + kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr, + get_tlb_end(gtlbe), tid); + } + + gtlbe->mas1 = vcpu_e500->mas1; + gtlbe->mas2 = vcpu_e500->mas2; + gtlbe->mas3 = vcpu_e500->mas3; + gtlbe->mas7 = vcpu_e500->mas7; + + KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0, + gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7, + handler); + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe_is_host_safe(vcpu, gtlbe)) { + switch (tlbsel) { + case 0: + /* TLB0 */ + gtlbe->mas1 &= ~MAS1_TSIZE(~0); + gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K); + + stlbsel = 0; + sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel); + + break; + + case 1: + /* TLB1 */ + eaddr = get_tlb_eaddr(gtlbe); + raddr = get_tlb_raddr(gtlbe); + + /* Create a 4KB mapping on the host. + * If the guest wanted a large page, + * only the first 4KB is mapped here and the rest + * are mapped on the fly. */ + stlbsel = 1; + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, + raddr >> PAGE_SHIFT, gtlbe); + break; + + default: + BUG(); + } + write_host_tlbe(vcpu_e500, stlbsel, sesel); + } + + return EMULATE_DONE; +} + +int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_IS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_DS); + + return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as); +} + +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_IS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as); +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_DS); + + kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as); +} + +gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, + gva_t eaddr) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe *gtlbe = + &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)]; + u64 pgmask = get_tlb_bytes(gtlbe) - 1; + + return get_tlb_raddr(gtlbe) | (eaddr & pgmask); +} + +void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel, i; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) + for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) + kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i); + + /* discard all guest mapping */ + _tlbia(); +} + +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, + unsigned int index) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int tlbsel = tlbsel_of(index); + int esel = esel_of(index); + int stlbsel, sesel; + + switch (tlbsel) { + case 0: + stlbsel = 0; + sesel = esel; + break; + + case 1: { + gfn_t gfn = gpaddr >> PAGE_SHIFT; + struct tlbe *gtlbe + = &vcpu_e500->guest_tlb[tlbsel][esel]; + + stlbsel = 1; + sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe); + break; + } + + default: + BUG(); + break; + } + write_host_tlbe(vcpu_e500, stlbsel, sesel); +} + +int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, + gva_t eaddr, unsigned int pid, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel, tlbsel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); + if (esel >= 0) + return index_of(tlbsel, esel); + } + + return -1; +} + +void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + struct tlbe *tlbe; + + /* Insert large initial mapping for guest. */ + tlbe = &vcpu_e500->guest_tlb[1][0]; + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M); + tlbe->mas2 = 0; + tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; + tlbe->mas7 = 0; + + /* 4K map for serial output. Used by kernel wrapper. */ + tlbe = &vcpu_e500->guest_tlb[1][1]; + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K); + tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; + tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; + tlbe->mas7 = 0; +} + +int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF; + + vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE; + vcpu_e500->guest_tlb[0] = + kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->guest_tlb[0] == NULL) + goto err_out; + + vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE; + vcpu_e500->shadow_tlb[0] = + kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->shadow_tlb[0] == NULL) + goto err_out_guest0; + + vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE; + vcpu_e500->guest_tlb[1] = + kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); + if (vcpu_e500->guest_tlb[1] == NULL) + goto err_out_shadow0; + + vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num; + vcpu_e500->shadow_tlb[1] = + kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL); + if (vcpu_e500->shadow_tlb[1] == NULL) + goto err_out_guest1; + + vcpu_e500->shadow_pages[0] = (struct page **) + kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL); + if (vcpu_e500->shadow_pages[0] == NULL) + goto err_out_shadow1; + + vcpu_e500->shadow_pages[1] = (struct page **) + kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL); + if (vcpu_e500->shadow_pages[1] == NULL) + goto err_out_page0; + + return 0; + +err_out_page0: + kfree(vcpu_e500->shadow_pages[0]); +err_out_shadow1: + kfree(vcpu_e500->shadow_tlb[1]); +err_out_guest1: + kfree(vcpu_e500->guest_tlb[1]); +err_out_shadow0: + kfree(vcpu_e500->shadow_tlb[0]); +err_out_guest0: + kfree(vcpu_e500->guest_tlb[0]); +err_out: + return -1; +} + +void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->shadow_pages[1]); + kfree(vcpu_e500->shadow_pages[0]); + kfree(vcpu_e500->shadow_tlb[1]); + kfree(vcpu_e500->guest_tlb[1]); + kfree(vcpu_e500->shadow_tlb[0]); + kfree(vcpu_e500->guest_tlb[0]); +} diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h new file mode 100644 index 000000000000..d8833f955163 --- /dev/null +++ b/arch/powerpc/kvm/e500_tlb.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.h, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef __KVM_E500_TLB_H__ +#define __KVM_E500_TLB_H__ + +#include +#include +#include +#include + +#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */ +#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT) +#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1) + +#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */ +#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT) +#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1) + +#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) +#define KVM_E500_TLB1_SIZE 16 + +#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) +#define tlbsel_of(index) ((index) >> 16) +#define esel_of(index) ((index) & 0xFFFF) + +#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) +#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) +#define MAS2_ATTRIB_MASK \ + (MAS2_X0 | MAS2_X1 | MAS2_W | MAS2_I | MAS2_M | MAS2_G | MAS2_E) +#define MAS3_ATTRIB_MASK \ + (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ + | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) + +extern void kvmppc_dump_tlbs(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); +extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); +extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); +extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); + +/* TLB helper functions */ +static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 8) & 0xf; +} + +static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +{ + return tlbe->mas2 & 0xfffff000; +} + +static inline u64 get_tlb_bytes(const struct tlbe *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1ULL << 10 << (pgsize << 1); +} + +static inline gva_t get_tlb_end(const struct tlbe *tlbe) +{ + u64 bytes = get_tlb_bytes(tlbe); + return get_tlb_eaddr(tlbe) + bytes - 1; +} + +static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +{ + u64 rpn = tlbe->mas7; + return (rpn << 32) | (tlbe->mas3 & 0xfffff000); +} + +static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 16) & 0xff; +} + +static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 12) & 0x1; +} + +static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 31) & 0x1; +} + +static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe) +{ + return (tlbe->mas1 >> 30) & 0x1; +} + +static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pid & 0xff; +} + +static inline unsigned int get_cur_spid( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return (vcpu_e500->mas6 >> 16) & 0xff; +} + +static inline unsigned int get_cur_sas( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return vcpu_e500->mas6 & 0x1; +} + +static inline unsigned int get_tlb_tlbsel( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + /* + * Manual says that tlbsel has 2 bits wide. + * Since we only have tow TLBs, only lower bit is used. + */ + return (vcpu_e500->mas0 >> 28) & 0x1; +} + +static inline unsigned int get_tlb_nv_bit( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return vcpu_e500->mas0 & 0xfff; +} + +static inline unsigned int get_tlb_esel_bit( + const struct kvmppc_vcpu_e500 *vcpu_e500) +{ + return (vcpu_e500->mas0 >> 16) & 0xfff; +} + +static inline unsigned int get_tlb_esel( + const struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel) +{ + unsigned int esel = get_tlb_esel_bit(vcpu_e500); + + if (tlbsel == 0) { + esel &= KVM_E500_TLB0_WAY_NUM_MASK; + esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK) + << KVM_E500_TLB0_WAY_NUM_BIT; + } else { + esel &= KVM_E500_TLB1_SIZE - 1; + } + + return esel; +} + +static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct tlbe *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +#endif /* __KVM_E500_TLB_H__ */ -- cgit v1.2.3-58-ga151 From b52a638c391c5c7b013180f5374274698b8535c8 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:11 -0600 Subject: KVM: ppc: Add kvmppc_mmu_dtlb/itlb_miss for booke When itlb or dtlb miss happens, E500 needs to update some mmu registers. So that the auto-load mechanism can work on E500 when write a tlb entry. Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/44x_tlb.c | 8 ++++++++ arch/powerpc/kvm/booke.c | 2 ++ 3 files changed, 12 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5e80a20f32b8..6052779dbb56 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -63,6 +63,8 @@ extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, gva_t eaddr); +extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); +extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index e67b7313ffc3..4a16f472cc18 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -232,6 +232,14 @@ int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } +void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) +{ +} + +void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) +{ +} + static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, unsigned int stlb_index) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a73b3959e41c..933c406c7915 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -295,6 +295,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_mmu_dtlb_miss(vcpu); kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); r = RESUME_GUEST; break; @@ -337,6 +338,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); + kvmppc_mmu_itlb_miss(vcpu); kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); break; } -- cgit v1.2.3-58-ga151 From bdc89f13ec955c14777d5caf02dfca3f51d639bd Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:12 -0600 Subject: KVM: ppc: distinguish between interrupts and priorities Although BOOKE_MAX_INTERRUPT has the right value, the meaning is not match. Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/booke.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 933c406c7915..f192fbee2f29 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -163,7 +163,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) unsigned int priority; priority = __ffs(*pending); - while (priority <= BOOKE_MAX_INTERRUPT) { + while (priority <= BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 311fdbc23fbe..7ceeb3e739b4 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -42,6 +42,7 @@ #define BOOKE_IRQPRIO_EXTERNAL 13 #define BOOKE_IRQPRIO_FIT 14 #define BOOKE_IRQPRIO_DECREMENTER 15 +#define BOOKE_IRQPRIO_MAX 15 /* Helper function for "full" MSR writes. No need to call this if only EE is * changing. */ -- cgit v1.2.3-58-ga151 From bb3a8a178dec1e46df3138a30f76acf67fe12397 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sat, 3 Jan 2009 16:23:13 -0600 Subject: KVM: ppc: Add extra E500 exceptions e500 has additional interrupt vectors (and corresponding IVORs) for SPE and performance monitoring interrupts. Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_asm.h | 7 ++++++- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/booke.c | 18 ++++++++++++++++++ arch/powerpc/kvm/booke.h | 30 ++++++++++++++++++------------ arch/powerpc/kvm/booke_interrupts.S | 3 +++ arch/powerpc/kvm/e500.c | 20 +++++++++++++++++++- arch/powerpc/kvm/e500_emulate.c | 27 +++++++++++++++++++++++++++ 7 files changed, 92 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 2197764796d9..56bfae59837f 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -42,7 +42,12 @@ #define BOOKE_INTERRUPT_DTLB_MISS 13 #define BOOKE_INTERRUPT_ITLB_MISS 14 #define BOOKE_INTERRUPT_DEBUG 15 -#define BOOKE_MAX_INTERRUPT 15 + +/* E500 */ +#define BOOKE_INTERRUPT_SPE_UNAVAIL 32 +#define BOOKE_INTERRUPT_SPE_FP_DATA 33 +#define BOOKE_INTERRUPT_SPE_FP_ROUND 34 +#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 50e5ce1b400a..1c6187697520 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -150,7 +150,7 @@ struct kvm_vcpu_arch { u32 tbu; u32 tcr; u32 tsr; - u32 ivor[16]; + u32 ivor[64]; ulong ivpr; u32 pir; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f192fbee2f29..642e4204cf25 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -118,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_DATA_STORAGE: case BOOKE_IRQPRIO_INST_STORAGE: case BOOKE_IRQPRIO_FP_UNAVAIL: + case BOOKE_IRQPRIO_SPE_UNAVAIL: + case BOOKE_IRQPRIO_SPE_FP_DATA: + case BOOKE_IRQPRIO_SPE_FP_ROUND: case BOOKE_IRQPRIO_AP_UNAVAIL: case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; @@ -261,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_SPE_UNAVAIL: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SPE_FP_DATA: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SPE_FP_ROUND: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 7ceeb3e739b4..d59bcca1f9d8 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -31,18 +31,24 @@ #define BOOKE_IRQPRIO_ALIGNMENT 2 #define BOOKE_IRQPRIO_PROGRAM 3 #define BOOKE_IRQPRIO_FP_UNAVAIL 4 -#define BOOKE_IRQPRIO_SYSCALL 5 -#define BOOKE_IRQPRIO_AP_UNAVAIL 6 -#define BOOKE_IRQPRIO_DTLB_MISS 7 -#define BOOKE_IRQPRIO_ITLB_MISS 8 -#define BOOKE_IRQPRIO_MACHINE_CHECK 9 -#define BOOKE_IRQPRIO_DEBUG 10 -#define BOOKE_IRQPRIO_CRITICAL 11 -#define BOOKE_IRQPRIO_WATCHDOG 12 -#define BOOKE_IRQPRIO_EXTERNAL 13 -#define BOOKE_IRQPRIO_FIT 14 -#define BOOKE_IRQPRIO_DECREMENTER 15 -#define BOOKE_IRQPRIO_MAX 15 +#define BOOKE_IRQPRIO_SPE_UNAVAIL 5 +#define BOOKE_IRQPRIO_SPE_FP_DATA 6 +#define BOOKE_IRQPRIO_SPE_FP_ROUND 7 +#define BOOKE_IRQPRIO_SYSCALL 8 +#define BOOKE_IRQPRIO_AP_UNAVAIL 9 +#define BOOKE_IRQPRIO_DTLB_MISS 10 +#define BOOKE_IRQPRIO_ITLB_MISS 11 +#define BOOKE_IRQPRIO_MACHINE_CHECK 12 +#define BOOKE_IRQPRIO_DEBUG 13 +#define BOOKE_IRQPRIO_CRITICAL 14 +#define BOOKE_IRQPRIO_WATCHDOG 15 +#define BOOKE_IRQPRIO_EXTERNAL 16 +#define BOOKE_IRQPRIO_FIT 17 +#define BOOKE_IRQPRIO_DECREMENTER 18 +#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 +#define BOOKE_IRQPRIO_MAX 19 + +extern unsigned long kvmppc_booke_handlers; /* Helper function for "full" MSR writes. No need to call this if only EE is * changing. */ diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 4679ec287e62..d0c6f841bbd1 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS KVM_HANDLER BOOKE_INTERRUPT_DEBUG +KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND _GLOBAL(kvmppc_handler_len) .long kvmppc_handler_1 - kvmppc_handler_0 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 7992da497cd4..d8067fd81cdd 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -21,6 +21,7 @@ #include #include +#include "booke.h" #include "e500_tlb.h" void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) @@ -133,12 +134,29 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) static int kvmppc_e500_init(void) { - int r; + int r, i; + unsigned long ivor[3]; + unsigned long max_ivor = 0; r = kvmppc_booke_init(); if (r) return r; + /* copy extra E500 exception handlers */ + ivor[0] = mfspr(SPRN_IVOR32); + ivor[1] = mfspr(SPRN_IVOR33); + ivor[2] = mfspr(SPRN_IVOR34); + for (i = 0; i < 3; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + (i + 16) * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); } diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index a47f44cd2cfd..d3c0c7c7f209 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -107,6 +107,20 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_HID1: vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; + /* extra exceptions */ + case SPRN_IVOR32: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR33: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR34: + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR35: + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs]; + break; + default: emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } @@ -160,6 +174,19 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_HID1: vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; + /* extra exceptions */ + case SPRN_IVOR32: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + break; + case SPRN_IVOR33: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + break; + case SPRN_IVOR34: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + break; + case SPRN_IVOR35: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + break; default: emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } -- cgit v1.2.3-58-ga151 From f5d0906b5bafd7faea553ed1cc92bd06755b66b9 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Sun, 4 Jan 2009 13:51:09 -0600 Subject: KVM: ppc: remove debug support broken by KVM debug rewrite After the rewrite of KVM's debug support, this code doesn't even build any more. Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 5 --- arch/powerpc/include/asm/kvm_ppc.h | 3 -- arch/powerpc/kvm/44x.c | 66 ------------------------------------- arch/powerpc/kvm/powerpc.c | 27 ++------------- 4 files changed, 2 insertions(+), 99 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1c6187697520..dfdf13c9fefd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -111,11 +111,6 @@ struct kvm_arch { struct kvm_vcpu_arch { u32 host_stack; u32 host_pid; - u32 host_dbcr0; - u32 host_dbcr1; - u32 host_dbcr2; - u32 host_iac[4]; - u32 host_msr; u64 fpr[32]; ulong gpr[32]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 6052779dbb56..2c6ee349df5e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -77,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); -extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); -extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); - extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 8383603dd8a4..0cef809cec21 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -28,72 +28,6 @@ #include "44x_tlb.h" -/* Note: clearing MSR[DE] just means that the debug interrupt will not be - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt - * will be delivered as an "imprecise debug event" (which is indicated by - * DBSR[IDE]. - */ -static void kvm44x_disable_debug_interrupts(void) -{ - mtmsr(mfmsr() & ~MSR_DE); -} - -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ - kvm44x_disable_debug_interrupts(); - - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); - mtmsr(vcpu->arch.host_msr); -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - u32 dbcr0 = 0; - - vcpu->arch.host_msr = mfmsr(); - kvm44x_disable_debug_interrupts(); - - /* Save host debug register state. */ - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); - - /* set registers up for guest */ - - if (dbg->bp[0]) { - mtspr(SPRN_IAC1, dbg->bp[0]); - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; - } - if (dbg->bp[1]) { - mtspr(SPRN_IAC2, dbg->bp[1]); - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; - } - if (dbg->bp[2]) { - mtspr(SPRN_IAC3, dbg->bp[2]); - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; - } - if (dbg->bp[3]) { - mtspr(SPRN_IAC4, dbg->bp[3]); - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; - } - - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBCR1, 0); - mtspr(SPRN_DBCR2, 0); -} - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvmppc_44x_tlb_load(vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f30be9ef2314..9057335fdc61 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -221,41 +221,18 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - if (vcpu->guest_debug.enabled) - kvmppc_core_load_guest_debugstate(vcpu); - kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { - if (vcpu->guest_debug.enabled) - kvmppc_core_load_host_debugstate(vcpu); - - /* Don't leave guest TLB entries resident when being de-scheduled. */ - /* XXX It would be nice to differentiate between heavyweight exit and - * sched_out here, since we could avoid the TLB flush for heavyweight - * exits. */ - _tlbil_all(); kvmppc_core_vcpu_put(vcpu); } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) + struct kvm_guest_debug *dbg) { - int i; - - vcpu->guest_debug.enabled = dbg->enabled; - if (vcpu->guest_debug.enabled) { - for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) { - if (dbg->breakpoints[i].enabled) - vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; - else - vcpu->guest_debug.bp[i] = 0; - } - } - - return 0; + return -EINVAL; } static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, -- cgit v1.2.3-58-ga151 From 5d9b8e30f543a9f21a968a4cda71e8f6d1c66a61 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 18:04:18 +0200 Subject: KVM: Add CONFIG_HAVE_KVM_IRQCHIP Two KVM archs support irqchips and two don't. Add a Kconfig item to make selecting between the two models easier. Signed-off-by: Avi Kivity --- arch/ia64/kvm/Kconfig | 4 ++++ arch/powerpc/kvm/Kconfig | 3 +++ arch/s390/kvm/Kconfig | 3 +++ arch/x86/kvm/Kconfig | 4 ++++ 4 files changed, 14 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index f833a0b4188d..0a2d6b86075a 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -4,6 +4,10 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + default y + menuconfig VIRTUALIZATION bool "Virtualization" depends on HAVE_KVM || IA64 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 146570550142..5a152a52796f 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -2,6 +2,9 @@ # KVM configuration # +config HAVE_KVM_IRQCHIP + bool + menuconfig VIRTUALIZATION bool "Virtualization" ---help--- diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index e051cad1f1e0..3e260b7e37b2 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -4,6 +4,9 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + menuconfig VIRTUALIZATION bool "Virtualization" default y diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b81125f0bdee..0a303c3ed11f 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -4,6 +4,10 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + default y + menuconfig VIRTUALIZATION bool "Virtualization" depends on HAVE_KVM || X86 -- cgit v1.2.3-58-ga151 From 87c656b4147cd08c6efba95d8d70c12cba181255 Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Wed, 14 Jan 2009 10:47:36 -0600 Subject: powerpc/fsl-booke: declare tlbcam_index for use in c So, KVM needs to read tlbcam_index to know exactly which TLB1 entry is unused by host. Signed-off-by: Liu Yu Acked-by: Kumar Gala Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/mmu-fsl-booke.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h index 3f941c0f7e8e..4285b64a65e0 100644 --- a/arch/powerpc/include/asm/mmu-fsl-booke.h +++ b/arch/powerpc/include/asm/mmu-fsl-booke.h @@ -75,6 +75,8 @@ #ifndef __ASSEMBLY__ +extern unsigned int tlbcam_index; + typedef struct { unsigned int id; unsigned int active; -- cgit v1.2.3-58-ga151 From fb2838d446f6ee7e13e4149e08ec138753c5c450 Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Wed, 14 Jan 2009 10:47:37 -0600 Subject: KVM: ppc: Fix e500 warnings and some spelling problems Signed-off-by: Avi Kivity --- arch/powerpc/kvm/e500_emulate.c | 2 -- arch/powerpc/kvm/e500_tlb.c | 6 +++--- arch/powerpc/kvm/e500_tlb.h | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index d3c0c7c7f209..7a98d4a4c1ed 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -30,8 +30,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int emulated = EMULATE_DONE; int ra; int rb; - int rs; - int rt; switch (get_op(inst)) { case 31: diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6a50340f575e..e3daf57b5f5e 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -260,7 +260,7 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int victim, pidsel, tsized; int tlbsel; - /* since we only have tow TLBs, only lower bit is used. */ + /* since we only have two TLBs, only lower bit is used. */ tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; pidsel = (vcpu_e500->mas4 >> 16) & 0xf; @@ -402,7 +402,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) ia = (ea >> 2) & 0x1; - /* since we only have tow TLBs, only lower bit is used. */ + /* since we only have two TLBs, only lower bit is used. */ tlbsel = (ea >> 3) & 0x1; if (ia) { @@ -471,7 +471,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) } else { int victim; - /* since we only have tow TLBs, only lower bit is used. */ + /* since we only have two TLBs, only lower bit is used. */ tlbsel = vcpu_e500->mas4 >> 28 & 0x1; victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index d8833f955163..ab49e9355185 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -126,7 +126,7 @@ static inline unsigned int get_tlb_tlbsel( { /* * Manual says that tlbsel has 2 bits wide. - * Since we only have tow TLBs, only lower bit is used. + * Since we only have two TLBs, only lower bit is used. */ return (vcpu_e500->mas0 >> 28) & 0x1; } -- cgit v1.2.3-58-ga151 From 9aa4dd5e5f47a5feb96fc394fccf0265ab7d85a4 Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Wed, 14 Jan 2009 10:47:38 -0600 Subject: KVM: ppc: Move to new TLB invalidate interface Commit 2a4aca1144394653269720ffbb5a325a77abd5fa removed old method _tlbia(). Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/e500_tlb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index e3daf57b5f5e..d437160d388c 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -20,6 +20,7 @@ #include #include +#include "../mm/mmu_decl.h" #include "e500_tlb.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -158,7 +159,7 @@ void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) { - _tlbia(); + _tlbil_all(); } /* Search the guest TLB for a matching entry. */ @@ -362,11 +363,10 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) int i; /* XXX Replace loop with fancy data structures. */ - /* needn't set modified since tlbia will make TLB1 coherent */ for (i = 0; i < tlb1_max_shadow_size(); i++) kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i); - _tlbia(); + _tlbil_all(); } } @@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } - _tlbia(); + _tlbil_all(); return EMULATE_DONE; } @@ -604,7 +604,7 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i); /* discard all guest mapping */ - _tlbia(); + _tlbil_all(); } void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, -- cgit v1.2.3-58-ga151 From b0a1835d53c57bc38b36867c04436b60454cb610 Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Tue, 17 Feb 2009 16:52:08 +0800 Subject: KVM: ppc: Add emulation of E500 register mmucsr0 Latest kernel flushes TLB via mmucsr0. Signed-off-by: Liu Yu Acked-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/e500_emulate.c | 8 ++++++++ arch/powerpc/kvm/e500_tlb.c | 16 ++++++++++++++++ arch/powerpc/kvm/e500_tlb.h | 1 + 3 files changed, 25 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 7a98d4a4c1ed..3f760414b9f8 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -105,6 +105,11 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_HID1: vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; + case SPRN_MMUCSR0: + emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, + vcpu->arch.gpr[rs]); + break; + /* extra exceptions */ case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; @@ -172,6 +177,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_HID1: vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; + case SPRN_MMUCSR0: + vcpu->arch.gpr[rt] = 0; break; + /* extra exceptions */ case SPRN_IVOR32: vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d437160d388c..72386ddbd9d5 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -391,6 +391,22 @@ static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, return 0; } +int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) +{ + int esel; + + if (value & MMUCSR0_TLB0FI) + for (esel = 0; esel < vcpu_e500->guest_tlb_size[0]; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); + if (value & MMUCSR0_TLB1FI) + for (esel = 0; esel < vcpu_e500->guest_tlb_size[1]; esel++) + kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); + + _tlbil_all(); + + return EMULATE_DONE; +} + int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index ab49e9355185..4d5cc0f7d796 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -44,6 +44,7 @@ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) extern void kvmppc_dump_tlbs(struct kvm_vcpu *); +extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); -- cgit v1.2.3-58-ga151 From bc35cbc85cd78213590761618a13da6a9707652c Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Tue, 17 Mar 2009 16:57:45 +0800 Subject: KVM: ppc: e500: Fix the bug that mas0 update to wrong value when read TLB entry Should clear and then update the next victim area here. Guest kernel only read TLB1 when startup kernel, this bug result in an extra 4K TLB1 mapping in guest from 0x0 to 0x0. As the problem has no impact to bootup a guest, we didn't notice it before. Signed-off-by: Liu Yu Signed-off-by: Avi Kivity --- arch/powerpc/kvm/e500_tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 72386ddbd9d5..ec933209e8af 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -448,7 +448,7 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) esel = get_tlb_esel(vcpu_e500, tlbsel); gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel]; - vcpu_e500->mas0 &= MAS0_NV(0); + vcpu_e500->mas0 &= ~MAS0_NV(~0); vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]); vcpu_e500->mas1 = gtlbe->mas1; vcpu_e500->mas2 = gtlbe->mas2; -- cgit v1.2.3-58-ga151 From 046a48b35baa7c66d0d0331256ba12ca51665411 Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Tue, 17 Mar 2009 16:57:46 +0800 Subject: KVM: ppc: e500: Fix the bug that KVM is unstable in SMP TLB entry should enable memory coherence in SMP. And like commit 631fba9dd3aca519355322cef035730609e91593, remove guard attribute to enable the prefetch of guest memory. Signed-off-by: Liu Yu Signed-off-by: Avi Kivity --- arch/powerpc/kvm/e500_tlb.c | 4 ++++ arch/powerpc/kvm/e500_tlb.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index ec933209e8af..0e773fc2d5e4 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -99,7 +99,11 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) { +#ifdef CONFIG_SMP + return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; +#else return mas2 & MAS2_ATTRIB_MASK; +#endif } /* diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index 4d5cc0f7d796..45b064b76906 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h @@ -38,7 +38,7 @@ #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) #define MAS2_ATTRIB_MASK \ - (MAS2_X0 | MAS2_X1 | MAS2_W | MAS2_I | MAS2_M | MAS2_G | MAS2_E) + (MAS2_X0 | MAS2_X1) #define MAS3_ATTRIB_MASK \ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) -- cgit v1.2.3-58-ga151 From 54ca5412b5576fdb0a4ea4fedf6565bd6f34150c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 Jan 2009 09:12:12 -0800 Subject: PS3: replace bus_id usage These simple debug statments should be using dev_dbg() instead of accessing bus_id directly (or they should use device_name). As bus_id is going away, this patch is necessary. Acked-by: Geoff Levand Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/ps3/system-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 58311a867851..a705fffbb498 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -376,7 +376,7 @@ static int ps3_system_bus_probe(struct device *_dev) struct ps3_system_bus_driver *drv; BUG_ON(!dev); - pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, _dev->bus_id); + dev_dbg(_dev, "%s:%d\n", __func__, __LINE__); drv = ps3_system_bus_dev_to_system_bus_drv(dev); BUG_ON(!drv); @@ -398,7 +398,7 @@ static int ps3_system_bus_remove(struct device *_dev) struct ps3_system_bus_driver *drv; BUG_ON(!dev); - pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, _dev->bus_id); + dev_dbg(_dev, "%s:%d\n", __func__, __LINE__); drv = ps3_system_bus_dev_to_system_bus_drv(dev); BUG_ON(!drv); -- cgit v1.2.3-58-ga151 From 2b1c6bd77d4e6a727ffac8630cd154b2144b751a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 10:09:09 +0100 Subject: generic compat_sys_ustat Due to a different size of ino_t ustat needs a compat handler, but currently only x86 and mips provide one. Add a generic compat_sys_ustat and switch all architectures over to it. Instead of doing various user copy hacks compat_sys_ustat just reimplements sys_ustat as it's trivial. This was suggested by Arnd Bergmann. Found by Eric Sandeen when running xfstests/017 on ppc64, which causes stack smashing warnings on RHEL/Fedora due to the too large amount of data writen by the syscall. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- arch/ia64/ia32/ia32_entry.S | 2 +- arch/mips/kernel/linux32.c | 34 ---------------------------------- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/parisc/kernel/syscall_table.S | 2 +- arch/powerpc/include/asm/systbl.h | 2 +- arch/s390/kernel/compat_wrapper.S | 2 +- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/ia32/sys_ia32.c | 22 ---------------------- arch/x86/include/asm/ia32.h | 7 ------- arch/x86/include/asm/sys_ia32.h | 2 -- fs/compat.c | 28 ++++++++++++++++++++++++++++ include/linux/compat.h | 8 ++++++++ 13 files changed, 43 insertions(+), 72 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index a46f8395e9a5..af9405cd70e5 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -240,7 +240,7 @@ ia32_syscall_table: data8 sys_ni_syscall data8 sys_umask /* 60 */ data8 sys_chroot - data8 sys_ustat + data8 compat_sys_ustat data8 sys_dup2 data8 sys_getppid data8 sys_getpgrp /* 65 */ diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 1a86f84fa947..784859cedef7 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -356,40 +356,6 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality) return ret; } -/* ustat compatibility */ -struct ustat32 { - compat_daddr_t f_tfree; - compat_ino_t f_tinode; - char f_fname[6]; - char f_fpack[6]; -}; - -extern asmlinkage long sys_ustat(dev_t dev, struct ustat __user * ubuf); - -SYSCALL_DEFINE2(32_ustat, dev_t, dev, struct ustat32 __user *, ubuf32) -{ - int err; - struct ustat tmp; - struct ustat32 tmp32; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - err = sys_ustat(dev, (struct ustat __user *)&tmp); - set_fs(old_fs); - - if (err) - goto out; - - memset(&tmp32, 0, sizeof(struct ustat32)); - tmp32.f_tfree = tmp.f_tfree; - tmp32.f_tinode = tmp.f_tinode; - - err = copy_to_user(ubuf32, &tmp32, sizeof(struct ustat32)) ? -EFAULT : 0; - -out: - return err; -} - SYSCALL_DEFINE4(32_sendfile, long, out_fd, long, in_fd, compat_off_t __user *, offset, s32, count) { diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 7438e92f8a01..f61d6b0e5731 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -253,7 +253,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_utime /* 6130 */ PTR sys_mknod PTR sys_32_personality - PTR sys_32_ustat + PTR compat_sys_ustat PTR compat_sys_statfs PTR compat_sys_fstatfs /* 6135 */ PTR sys_sysfs diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index b0fef4ff9827..60997f1f69d4 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -265,7 +265,7 @@ sys_call_table: PTR sys_olduname PTR sys_umask /* 4060 */ PTR sys_chroot - PTR sys_32_ustat + PTR compat_sys_ustat PTR sys_dup2 PTR sys_getppid PTR sys_getpgrp /* 4065 */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 303d2b647e41..03b9a01bc16c 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -130,7 +130,7 @@ ENTRY_OURS(newuname) ENTRY_SAME(umask) /* 60 */ ENTRY_SAME(chroot) - ENTRY_SAME(ustat) + ENTRY_COMP(ustat) ENTRY_SAME(dup2) ENTRY_SAME(getppid) ENTRY_SAME(getpgrp) /* 65 */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 72353f6070a4..fe166491e9dc 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -65,7 +65,7 @@ SYSCALL(ni_syscall) SYSX(sys_ni_syscall,sys_olduname, sys_olduname) COMPAT_SYS_SPU(umask) SYSCALL_SPU(chroot) -SYSCALL(ustat) +COMPAT_SYS(ustat) SYSCALL_SPU(dup2) SYSCALL_SPU(getppid) SYSCALL_SPU(getpgrp) diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 62c706eb0de6..87cf5a79a351 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -252,7 +252,7 @@ sys32_chroot_wrapper: sys32_ustat_wrapper: llgfr %r2,%r2 # dev_t llgtr %r3,%r3 # struct ustat * - jg sys_ustat + jg compat_sys_ustat .globl sys32_dup2_wrapper sys32_dup2_wrapper: diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 5a0d76dc56a4..8ef8876666b2 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -557,7 +557,7 @@ ia32_sys_call_table: .quad sys32_olduname .quad sys_umask /* 60 */ .quad sys_chroot - .quad sys32_ustat + .quad compat_sys_ustat .quad sys_dup2 .quad sys_getppid .quad sys_getpgrp /* 65 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 6c0d7f6231af..efac92fd1efb 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -638,28 +638,6 @@ long sys32_uname(struct old_utsname __user *name) return err ? -EFAULT : 0; } -long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) -{ - struct ustat u; - mm_segment_t seg; - int ret; - - seg = get_fs(); - set_fs(KERNEL_DS); - ret = sys_ustat(dev, (struct ustat __user *)&u); - set_fs(seg); - if (ret < 0) - return ret; - - if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) || - __put_user((__u32) u.f_tfree, &u32p->f_tfree) || - __put_user((__u32) u.f_tinode, &u32p->f_tfree) || - __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || - __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) - ret = -EFAULT; - return ret; -} - asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs) { diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 50ca486fd88c..1f7e62517284 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -129,13 +129,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -struct ustat32 { - __u32 f_tfree; - compat_ino_t f_tinode; - char f_fname[6]; - char f_fpack[6]; -}; - #define IA32_STACK_TOP IA32_PAGE_OFFSET #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index ffb08be2a530..72a6dcd1299b 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -70,8 +70,6 @@ struct old_utsname; asmlinkage long sys32_olduname(struct oldold_utsname __user *); long sys32_uname(struct old_utsname __user *); -long sys32_ustat(unsigned, struct ustat32 __user *); - asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, compat_uptr_t __user *, struct pt_regs *); asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); diff --git a/fs/compat.c b/fs/compat.c index d0145ca27572..4e0db94b5353 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -378,6 +378,34 @@ out: return error; } +/* + * This is a copy of sys_ustat, just dealing with a structure layout. + * Given how simple this syscall is that apporach is more maintainable + * than the various conversion hacks. + */ +asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) +{ + struct super_block *sb; + struct compat_ustat tmp; + struct kstatfs sbuf; + int err; + + sb = user_get_super(new_decode_dev(dev)); + if (!sb) + return -EINVAL; + err = vfs_statfs(sb->s_root, &sbuf); + drop_super(sb); + if (err) + return err; + + memset(&tmp, 0, sizeof(struct compat_ustat)); + tmp.f_tfree = sbuf.f_bfree; + tmp.f_tinode = sbuf.f_ffree; + if (copy_to_user(u, &tmp, sizeof(struct compat_ustat))) + return -EFAULT; + return 0; +} + static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) { if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || diff --git a/include/linux/compat.h b/include/linux/compat.h index 3fd2194ff573..b880864672de 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -125,6 +125,13 @@ struct compat_dirent { char d_name[256]; }; +struct compat_ustat { + compat_daddr_t f_tfree; + compat_ino_t f_tinode; + char f_fname[6]; + char f_fpack[6]; +}; + typedef union compat_sigval { compat_int_t sival_int; compat_uptr_t sival_ptr; @@ -178,6 +185,7 @@ long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, unsigned nsems, const struct compat_timespec __user *timeout); asmlinkage long compat_sys_keyctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5); +asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); asmlinkage ssize_t compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen); -- cgit v1.2.3-58-ga151