diff options
-rw-r--r-- | arch/x86/entry/vdso/vma.c | 7 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/hyperv-tlfs.h | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 6 | ||||
-rw-r--r-- | drivers/clocksource/hyperv_timer.c | 61 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 94 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 13 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 105 | ||||
-rw-r--r-- | drivers/iommu/Kconfig | 6 | ||||
-rw-r--r-- | drivers/iommu/hyperv-iommu.c | 11 | ||||
-rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 6 | ||||
-rw-r--r-- | drivers/video/fbdev/hyperv_fb.c | 16 | ||||
-rw-r--r-- | include/clocksource/hyperv_timer.h | 7 | ||||
-rw-r--r-- | include/linux/hyperv.h | 4 | ||||
-rw-r--r-- | mm/page_reporting.c | 50 |
15 files changed, 302 insertions, 99 deletions
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 311eae30e089..6976416b2c9f 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -210,11 +210,10 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, pgprot_decrypted(vma->vm_page_prot)); } } else if (sym_offset == image->sym_hvclock_page) { - struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); + pfn = hv_get_tsc_pfn(); - if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) - return vmf_insert_pfn(vma, vmf->address, - virt_to_phys(tsc_pg) >> PAGE_SHIFT); + if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) + return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_timens_page) { struct page *timens_page = find_timens_vvar_page(vma); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a269049a43ce..41ef036ebb7b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -462,6 +462,8 @@ void __init hyperv_init(void) BUG_ON(!src); memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE); memunmap(src); + + hv_remap_tsc_clocksource(); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -535,8 +537,6 @@ void hyperv_cleanup(void) union hv_x64_msr_hypercall_contents hypercall_msr; union hv_reference_tsc_msr tsc_msr; - unregister_syscore_ops(&hv_syscore_ops); - /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 3089ec352743..6d9368ea3701 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -374,11 +374,20 @@ struct hv_nested_enlightenments_control { struct hv_vp_assist_page { __u32 apic_assist; __u32 reserved1; - __u64 vtl_control[3]; + __u32 vtl_entry_reason; + __u32 vtl_reserved; + __u64 vtl_ret_x64rax; + __u64 vtl_ret_x64rcx; struct hv_nested_enlightenments_control nested_control; __u8 enlighten_vmentry; __u8 reserved2[7]; __u64 current_nested_vmcs; + __u8 synthetic_time_unhalted_timer_expired; + __u8 reserved3[7]; + __u8 virtualization_fault_information[40]; + __u8 reserved4[8]; + __u8 intercept_message[256]; + __u8 vtl_ret_actions[256]; } __packed; struct hv_enlightened_vmcs { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 831613959a92..46668e255421 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -475,6 +475,12 @@ static bool __init ms_hyperv_x2apic_available(void) * (logically) generates MSIs directly to the system APIC irq domain. * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the * pci-hyperv host bridge. + * + * Note: for a Hyper-V root partition, this will always return false. + * The hypervisor doesn't expose these HYPERV_CPUID_VIRT_STACK_* cpuids by + * default, they are implemented as intercepts by the Windows Hyper-V stack. + * Even a nested root partition (L2 root) will not get them because the + * nested (L1) hypervisor filters them out. */ static bool __init ms_hyperv_msi_ext_dest_id(void) { diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 18de1f439ffd..c0cef92b12b8 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -367,9 +367,18 @@ static union { u8 reserved[PAGE_SIZE]; } tsc_pg __aligned(PAGE_SIZE); +static struct ms_hyperv_tsc_page *tsc_page = &tsc_pg.page; +static unsigned long tsc_pfn; + +unsigned long hv_get_tsc_pfn(void) +{ + return tsc_pfn; +} +EXPORT_SYMBOL_GPL(hv_get_tsc_pfn); + struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { - return &tsc_pg.page; + return tsc_page; } EXPORT_SYMBOL_GPL(hv_get_tsc_page); @@ -407,13 +416,12 @@ static void suspend_hv_clock_tsc(struct clocksource *arg) static void resume_hv_clock_tsc(struct clocksource *arg) { - phys_addr_t phys_addr = virt_to_phys(&tsc_pg); union hv_reference_tsc_msr tsc_msr; /* Re-enable the TSC page */ tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr.enable = 1; - tsc_msr.pfn = HVPFN_DOWN(phys_addr); + tsc_msr.pfn = tsc_pfn; hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); } @@ -497,14 +505,10 @@ static __always_inline void hv_setup_sched_clock(void *sched_clock) {} static bool __init hv_init_tsc_clocksource(void) { union hv_reference_tsc_msr tsc_msr; - phys_addr_t phys_addr; if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) return false; - if (hv_root_partition) - return false; - /* * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly * handles frequency and offset changes due to live migration, @@ -522,18 +526,30 @@ static bool __init hv_init_tsc_clocksource(void) } hv_read_reference_counter = read_hv_clock_tsc; - phys_addr = virt_to_phys(hv_get_tsc_page()); /* - * The Hyper-V TLFS specifies to preserve the value of reserved - * bits in registers. So read the existing value, preserve the - * low order 12 bits, and add in the guest physical address - * (which already has at least the low 12 bits set to zero since - * it is page aligned). Also set the "enable" bit, which is bit 0. + * TSC page mapping works differently in root compared to guest. + * - In guest partition the guest PFN has to be passed to the + * hypervisor. + * - In root partition it's other way around: it has to map the PFN + * provided by the hypervisor. + * But it can't be mapped right here as it's too early and MMU isn't + * ready yet. So, we only set the enable bit here and will remap the + * page later in hv_remap_tsc_clocksource(). + * + * It worth mentioning, that TSC clocksource read function + * (read_hv_clock_tsc) has a MSR-based fallback mechanism, used when + * TSC page is zeroed (which is the case until the PFN is remapped) and + * thus TSC clocksource will work even without the real TSC page + * mapped. */ tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC); + if (hv_root_partition) + tsc_pfn = tsc_msr.pfn; + else + tsc_pfn = HVPFN_DOWN(virt_to_phys(tsc_page)); tsc_msr.enable = 1; - tsc_msr.pfn = HVPFN_DOWN(phys_addr); + tsc_msr.pfn = tsc_pfn; hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); @@ -566,3 +582,20 @@ void __init hv_init_clocksource(void) hv_sched_clock_offset = hv_read_reference_counter(); hv_setup_sched_clock(read_hv_sched_clock_msr); } + +void __init hv_remap_tsc_clocksource(void) +{ + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) + return; + + if (!hv_root_partition) { + WARN(1, "%s: attempt to remap TSC page in guest partition\n", + __func__); + return; + } + + tsc_page = memremap(tsc_pfn << HV_HYP_PAGE_SHIFT, sizeof(tsc_pg), + MEMREMAP_WB); + if (!tsc_page) + pr_err("Failed to remap Hyper-V TSC page.\n"); +} diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 6c127f061f06..cbe43e2567a7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -469,12 +469,16 @@ static bool do_hot_add; * the specified number of seconds. */ static uint pressure_report_delay = 45; +extern unsigned int page_reporting_order; +#define HV_MAX_FAILURES 2 /* * The last time we posted a pressure report to host. */ static unsigned long last_post_time; +static int hv_hypercall_multi_failure; + module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); @@ -579,6 +583,10 @@ static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); +static void enable_page_reporting(void); + +static void disable_page_reporting(void); + #ifdef CONFIG_MEMORY_HOTPLUG static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, unsigned long pfn) @@ -1418,6 +1426,18 @@ static int dm_thread_func(void *dm_dev) */ reinit_completion(&dm_device.config_event); post_status(dm); + /* + * disable free page reporting if multiple hypercall + * failure flag set. It is not done in the page_reporting + * callback context as that causes a deadlock between + * page_reporting_process() and page_reporting_unregister() + */ + if (hv_hypercall_multi_failure >= HV_MAX_FAILURES) { + pr_err("Multiple failures in cold memory discard hypercall, disabling page reporting\n"); + disable_page_reporting(); + /* Reset the flag after disabling reporting */ + hv_hypercall_multi_failure = 0; + } } return 0; @@ -1593,20 +1613,20 @@ static void balloon_onchannelcallback(void *context) } -/* Hyper-V only supports reporting 2MB pages or higher */ -#define HV_MIN_PAGE_REPORTING_ORDER 9 -#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER) +#define HV_LARGE_REPORTING_ORDER 9 +#define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ + HV_LARGE_REPORTING_ORDER) static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, struct scatterlist *sgl, unsigned int nents) { unsigned long flags; struct hv_memory_hint *hint; - int i; + int i, order; u64 status; struct scatterlist *sg; WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); - WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN); + WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order)); local_irq_save(flags); hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg); if (!hint) { @@ -1621,21 +1641,53 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, range = &hint->ranges[i]; range->address_space = 0; - /* page reporting only reports 2MB pages or higher */ - range->page.largepage = 1; - range->page.additional_pages = - (sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1; - range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; - range->base_large_pfn = - page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER; + order = get_order(sg->length); + /* + * Hyper-V expects the additional_pages field in the units + * of one of these 3 sizes, 4Kbytes, 2Mbytes or 1Gbytes. + * This is dictated by the values of the fields page.largesize + * and page_size. + * This code however, only uses 4Kbytes and 2Mbytes units + * and not 1Gbytes unit. + */ + + /* page reporting for pages 2MB or higher */ + if (order >= HV_LARGE_REPORTING_ORDER ) { + range->page.largepage = 1; + range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; + range->base_large_pfn = page_to_hvpfn( + sg_page(sg)) >> HV_LARGE_REPORTING_ORDER; + range->page.additional_pages = + (sg->length / HV_LARGE_REPORTING_LEN) - 1; + } else { + /* Page reporting for pages below 2MB */ + range->page.basepfn = page_to_hvpfn(sg_page(sg)); + range->page.largepage = false; + range->page.additional_pages = + (sg->length / HV_HYP_PAGE_SIZE) - 1; + } + } status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, hint, NULL); local_irq_restore(flags); - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { + if (!hv_result_success(status)) { + pr_err("Cold memory discard hypercall failed with status %llx\n", - status); + status); + if (hv_hypercall_multi_failure > 0) + hv_hypercall_multi_failure++; + + if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) { + pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n"); + pr_err("Defaulting to page_reporting_order %d\n", + pageblock_order); + page_reporting_order = pageblock_order; + hv_hypercall_multi_failure++; + return -EINVAL; + } + return -EINVAL; } @@ -1646,12 +1698,6 @@ static void enable_page_reporting(void) { int ret; - /* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */ - if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) { - pr_debug("Cold memory discard is only supported on 2MB pages and above\n"); - return; - } - if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { pr_debug("Cold memory discard hint not supported by Hyper-V\n"); return; @@ -1659,12 +1705,18 @@ static void enable_page_reporting(void) BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); dm_device.pr_dev_info.report = hv_free_page_report; + /* + * We let the page_reporting_order parameter decide the order + * in the page_reporting code + */ + dm_device.pr_dev_info.order = 0; ret = page_reporting_register(&dm_device.pr_dev_info); if (ret < 0) { dm_device.pr_dev_info.report = NULL; pr_err("Failed to enable cold memory discard: %d\n", ret); } else { - pr_info("Cold memory discard hint enabled\n"); + pr_info("Cold memory discard hint enabled with order %d\n", + page_reporting_order); } } diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 59a4aa86d1f3..c6692fd5ab15 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -280,6 +280,19 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) ring_info->pkt_buffer_size = 0; } +/* + * Check if the ring buffer spinlock is available to take or not; used on + * atomic contexts, like panic path (see the Hyper-V framebuffer driver). + */ + +bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *rinfo = &channel->outbound; + + return spin_is_locked(&rinfo->ring_lock); +} +EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy); + /* Write to the ring buffer. */ int hv_ringbuffer_write(struct vmbus_channel *channel, const struct kvec *kv_list, u32 kv_count, diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e592c481f7ae..0f00d57b7c25 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -25,7 +25,6 @@ #include <linux/sched/task_stack.h> #include <linux/delay.h> -#include <linux/notifier.h> #include <linux/panic_notifier.h> #include <linux/ptrace.h> #include <linux/screen_info.h> @@ -68,53 +67,74 @@ static int hyperv_report_reg(void) return !sysctl_record_panic_msg || !hv_panic_page; } -static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, +/* + * The panic notifier below is responsible solely for unloading the + * vmbus connection, which is necessary in a panic event. + * + * Notice an intrincate relation of this notifier with Hyper-V + * framebuffer panic notifier exists - we need vmbus connection alive + * there in order to succeed, so we need to order both with each other + * [see hvfb_on_panic()] - this is done using notifiers' priorities. + */ +static int hv_panic_vmbus_unload(struct notifier_block *nb, unsigned long val, void *args) { - struct pt_regs *regs; - vmbus_initiate_unload(true); - - /* - * Hyper-V should be notified only once about a panic. If we will be - * doing hv_kmsg_dump() with kmsg data later, don't do the notification - * here. - */ - if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE - && hyperv_report_reg()) { - regs = current_pt_regs(); - hyperv_report_panic(regs, val, false); - } return NOTIFY_DONE; } +static struct notifier_block hyperv_panic_vmbus_unload_block = { + .notifier_call = hv_panic_vmbus_unload, + .priority = INT_MIN + 1, /* almost the latest one to execute */ +}; + +static int hv_die_panic_notify_crash(struct notifier_block *self, + unsigned long val, void *args); + +static struct notifier_block hyperv_die_report_block = { + .notifier_call = hv_die_panic_notify_crash, +}; +static struct notifier_block hyperv_panic_report_block = { + .notifier_call = hv_die_panic_notify_crash, +}; -static int hyperv_die_event(struct notifier_block *nb, unsigned long val, - void *args) +/* + * The following callback works both as die and panic notifier; its + * goal is to provide panic information to the hypervisor unless the + * kmsg dumper is used [see hv_kmsg_dump()], which provides more + * information but isn't always available. + * + * Notice that both the panic/die report notifiers are registered only + * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set. + */ +static int hv_die_panic_notify_crash(struct notifier_block *self, + unsigned long val, void *args) { - struct die_args *die = args; - struct pt_regs *regs = die->regs; + struct pt_regs *regs; + bool is_die; - /* Don't notify Hyper-V if the die event is other than oops */ - if (val != DIE_OOPS) - return NOTIFY_DONE; + /* Don't notify Hyper-V unless we have a die oops event or panic. */ + if (self == &hyperv_panic_report_block) { + is_die = false; + regs = current_pt_regs(); + } else { /* die event */ + if (val != DIE_OOPS) + return NOTIFY_DONE; + + is_die = true; + regs = ((struct die_args *)args)->regs; + } /* - * Hyper-V should be notified only once about a panic. If we will be - * doing hv_kmsg_dump() with kmsg data later, don't do the notification - * here. + * Hyper-V should be notified only once about a panic/die. If we will + * be calling hv_kmsg_dump() later with kmsg data, don't do the + * notification here. */ if (hyperv_report_reg()) - hyperv_report_panic(regs, val, true); + hyperv_report_panic(regs, val, is_die); + return NOTIFY_DONE; } -static struct notifier_block hyperv_die_block = { - .notifier_call = hyperv_die_event, -}; -static struct notifier_block hyperv_panic_block = { - .notifier_call = hyperv_panic_event, -}; - static const char *fb_mmio_name = "fb_range"; static struct resource *fb_mmio; static struct resource *hyperv_mmio; @@ -1538,16 +1558,17 @@ static int vmbus_bus_init(void) if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) hv_kmsg_dump_register(); - register_die_notifier(&hyperv_die_block); + register_die_notifier(&hyperv_die_report_block); + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_report_block); } /* - * Always register the panic notifier because we need to unload - * the VMbus channel connection to prevent any VMbus - * activity after the VM panics. + * Always register the vmbus unload panic notifier because we + * need to shut the VMbus channel connection on panic. */ atomic_notifier_chain_register(&panic_notifier_list, - &hyperv_panic_block); + &hyperv_panic_vmbus_unload_block); vmbus_request_offers(); @@ -2800,15 +2821,17 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); - unregister_die_notifier(&hyperv_die_block); + unregister_die_notifier(&hyperv_die_report_block); + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_report_block); } /* - * The panic notifier is always registered, hence we should + * The vmbus panic notifier is always registered, hence we should * also unconditionally unregister it here as well. */ atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); + &hyperv_panic_vmbus_unload_block); free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index dc5f7a156ff5..cf7433652db0 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -474,13 +474,13 @@ config QCOM_IOMMU Support for IOMMU on certain Qualcomm SoCs. config HYPERV_IOMMU - bool "Hyper-V x2APIC IRQ Handling" + bool "Hyper-V IRQ Handling" depends on HYPERV && X86 select IOMMU_API default HYPERV help - Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux - guests to run with x2APIC mode enabled. + Stub IOMMU driver to handle IRQs to support Hyper-V Linux + guest and root partitions. config VIRTIO_IOMMU tristate "Virtio IOMMU driver" diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index e190bb8c225c..8302db7f783e 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -122,9 +122,12 @@ static int __init hyperv_prepare_irq_remapping(void) const char *name; const struct irq_domain_ops *ops; + /* + * For a Hyper-V root partition, ms_hyperv_msi_ext_dest_id() + * will always return false. + */ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || - x86_init.hyper.msi_ext_dest_id() || - !x2apic_supported()) + x86_init.hyper.msi_ext_dest_id()) return -ENODEV; if (hv_root_partition) { @@ -170,7 +173,9 @@ static int __init hyperv_prepare_irq_remapping(void) static int __init hyperv_enable_irq_remapping(void) { - return IRQ_REMAP_X2APIC_MODE; + if (x2apic_supported()) + return IRQ_REMAP_X2APIC_MODE; + return IRQ_REMAP_XAPIC_MODE; } struct irq_remap_ops hyperv_irq_remap_ops = { diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index f1ec8931dfbc..583d3aad6908 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -735,9 +735,9 @@ exit_unlock: * during hibernation does not matter (at this time all the devices * have been frozen). Note: the correct affinity info is still updated * into the irqdata data structure in migrate_one_irq() -> - * irq_do_set_affinity() -> hv_set_affinity(), so later when the VM - * resumes, hv_pci_restore_msi_state() is able to correctly restore - * the interrupt with the correct affinity. + * irq_do_set_affinity(), so later when the VM resumes, + * hv_pci_restore_msi_state() is able to correctly restore the + * interrupt with the correct affinity. */ if (!hv_result_success(res) && hbus->state != hv_pcibus_removing) dev_err(&hbus->hdev->device, diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 072ce07ba9e0..3ce746a46179 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -780,12 +780,18 @@ static void hvfb_ondemand_refresh_throttle(struct hvfb_par *par, static int hvfb_on_panic(struct notifier_block *nb, unsigned long e, void *p) { + struct hv_device *hdev; struct hvfb_par *par; struct fb_info *info; par = container_of(nb, struct hvfb_par, hvfb_panic_nb); - par->synchronous_fb = true; info = par->info; + hdev = device_to_hv_device(info->device); + + if (hv_ringbuffer_spinlock_busy(hdev->channel)) + return NOTIFY_DONE; + + par->synchronous_fb = true; if (par->need_docopy) hvfb_docopy(par, 0, dio_fb_size); synthvid_update(info, 0, 0, INT_MAX, INT_MAX); @@ -1208,7 +1214,15 @@ static int hvfb_probe(struct hv_device *hdev, par->fb_ready = true; par->synchronous_fb = false; + + /* + * We need to be sure this panic notifier runs _before_ the + * vmbus disconnect, so order it by priority. It must execute + * before the function hv_panic_vmbus_unload() [drivers/hv/vmbus_drv.c], + * which is almost at the end of list, with priority = INT_MIN + 1. + */ par->hvfb_panic_nb.notifier_call = hvfb_on_panic; + par->hvfb_panic_nb.priority = INT_MIN + 10, atomic_notifier_chain_register(&panic_notifier_list, &par->hvfb_panic_nb); diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index b3f5d73ae1d6..783701a2102d 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -31,7 +31,9 @@ extern void hv_stimer_global_cleanup(void); extern void hv_stimer0_isr(void); extern void hv_init_clocksource(void); +extern void hv_remap_tsc_clocksource(void); +extern unsigned long hv_get_tsc_pfn(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); static inline notrace u64 @@ -90,6 +92,11 @@ hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) } #else /* CONFIG_HYPERV_TIMER */ +static inline unsigned long hv_get_tsc_pfn(void) +{ + return 0; +} + static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { return NULL; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3b42264333ef..85f7c5a63aa6 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -969,7 +969,7 @@ struct vmbus_channel { * mechanism improves throughput by: * * A) Making the host more efficient - each time it wakes up, - * potentially it will process morev number of packets. The + * potentially it will process more number of packets. The * monitor latency allows a batch to build up. * B) By deferring the hypercall to signal, we will also minimize * the interrupts. @@ -1341,6 +1341,8 @@ struct hv_ring_buffer_debug_info { int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, struct hv_ring_buffer_debug_info *debug_info); +bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel); + /* Vmbus interface */ #define vmbus_driver_register(driver) \ __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) diff --git a/mm/page_reporting.c b/mm/page_reporting.c index 382958eef8a9..79a8554f024c 100644 --- a/mm/page_reporting.c +++ b/mm/page_reporting.c @@ -11,10 +11,42 @@ #include "page_reporting.h" #include "internal.h" -unsigned int page_reporting_order = MAX_ORDER; -module_param(page_reporting_order, uint, 0644); +/* Initialize to an unsupported value */ +unsigned int page_reporting_order = -1; + +static int page_order_update_notify(const char *val, const struct kernel_param *kp) +{ + /* + * If param is set beyond this limit, order is set to default + * pageblock_order value + */ + return param_set_uint_minmax(val, kp, 0, MAX_ORDER-1); +} + +static const struct kernel_param_ops page_reporting_param_ops = { + .set = &page_order_update_notify, + /* + * For the get op, use param_get_int instead of param_get_uint. + * This is to make sure that when unset the initialized value of + * -1 is shown correctly + */ + .get = ¶m_get_int, +}; + +module_param_cb(page_reporting_order, &page_reporting_param_ops, + &page_reporting_order, 0644); MODULE_PARM_DESC(page_reporting_order, "Set page reporting order"); +/* + * This symbol is also a kernel parameter. Export the page_reporting_order + * symbol so that other drivers can access it to control order values without + * having to introduce another configurable parameter. Only one driver can + * register with the page_reporting driver for the service, so we have just + * one control parameter for the use case(which can be accessed in both + * drivers) + */ +EXPORT_SYMBOL_GPL(page_reporting_order); + #define PAGE_REPORTING_DELAY (2 * HZ) static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly; @@ -330,10 +362,18 @@ int page_reporting_register(struct page_reporting_dev_info *prdev) } /* - * Update the page reporting order if it's specified by driver. - * Otherwise, it falls back to @pageblock_order. + * If the page_reporting_order value is not set, we check if + * an order is provided from the driver that is performing the + * registration. If that is not provided either, we default to + * pageblock_order. */ - page_reporting_order = prdev->order ? : pageblock_order; + + if (page_reporting_order == -1) { + if (prdev->order > 0 && prdev->order <= MAX_ORDER) + page_reporting_order = prdev->order; + else + page_reporting_order = pageblock_order; + } /* initialize state and work structures */ atomic_set(&prdev->state, PAGE_REPORTING_IDLE); |