From a563f7598198b8389e00451ef6f3f1c12efbfb99 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 4 Apr 2016 11:43:15 +0100 Subject: arm64: Reuse TCR field definitions for EL1 and EL2 TCR_EL1, TCR_EL2 and VTCR_EL2, all share some field positions (TG0, ORGN0, IRGN0 and SH0) and their corresponding value definitions. This patch makes the TCR_EL1 definitions reusable and uses them for TCR_EL2 and VTCR_EL2 fields. This also fixes a bug where we assume TG0 in {V}TCR_EL2 is 1bit field. Cc: Catalin Marinas Cc: Mark Rutland Acked-by: Marc Zyngier Acked-by: Will Deacon Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/kvm_arm.h | 48 ++++++++++---------- arch/arm64/include/asm/pgtable-hwdef.h | 80 ++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3f29887995bc..a46b39f62426 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -96,32 +96,34 @@ SCTLR_EL2_SA | SCTLR_EL2_I) /* TCR_EL2 Registers bits */ -#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) -#define TCR_EL2_TBI (1 << 20) -#define TCR_EL2_PS (7 << 16) -#define TCR_EL2_PS_40B (2 << 16) -#define TCR_EL2_TG0 (1 << 14) -#define TCR_EL2_SH0 (3 << 12) -#define TCR_EL2_ORGN0 (3 << 10) -#define TCR_EL2_IRGN0 (3 << 8) -#define TCR_EL2_T0SZ 0x3f -#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ - TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) +#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) +#define TCR_EL2_TBI (1 << 20) +#define TCR_EL2_PS_SHIFT 16 +#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT) +#define TCR_EL2_TG0_MASK TCR_TG0_MASK +#define TCR_EL2_SH0_MASK TCR_SH0_MASK +#define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK +#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK +#define TCR_EL2_T0SZ_MASK 0x3f +#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1 << 31) -#define VTCR_EL2_PS_MASK (7 << 16) -#define VTCR_EL2_TG0_MASK (1 << 14) -#define VTCR_EL2_TG0_4K (0 << 14) -#define VTCR_EL2_TG0_64K (1 << 14) -#define VTCR_EL2_SH0_MASK (3 << 12) -#define VTCR_EL2_SH0_INNER (3 << 12) -#define VTCR_EL2_ORGN0_MASK (3 << 10) -#define VTCR_EL2_ORGN0_WBWA (1 << 10) -#define VTCR_EL2_IRGN0_MASK (3 << 8) -#define VTCR_EL2_IRGN0_WBWA (1 << 8) -#define VTCR_EL2_SL0_MASK (3 << 6) -#define VTCR_EL2_SL0_LVL1 (1 << 6) +#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK +#define VTCR_EL2_TG0_MASK TCR_TG0_MASK +#define VTCR_EL2_TG0_4K TCR_TG0_4K +#define VTCR_EL2_TG0_64K TCR_TG0_64K +#define VTCR_EL2_SH0_MASK TCR_SH0_MASK +#define VTCR_EL2_SH0_INNER TCR_SH0_INNER +#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK +#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA +#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK +#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA +#define VTCR_EL2_SL0_SHIFT 6 +#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) +#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT) #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 #define VTCR_EL2_VS_SHIFT 19 diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5c25b831273d..936f1732727c 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -208,23 +208,69 @@ #define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 -#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) -#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) -#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) -#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24)) -#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24)) -#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26)) -#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26)) -#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26)) -#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) -#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) -#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) -#define TCR_TG0_4K (UL(0) << 14) -#define TCR_TG0_64K (UL(1) << 14) -#define TCR_TG0_16K (UL(2) << 14) -#define TCR_TG1_16K (UL(1) << 30) -#define TCR_TG1_4K (UL(2) << 30) -#define TCR_TG1_64K (UL(3) << 30) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_IRGN1_SHIFT 24 +#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT) +#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT) +#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT) +#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT) +#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT) + +#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC) +#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) +#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT) +#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA) +#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK) + + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_ORGN1_SHIFT 26 +#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT) +#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT) +#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT) +#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT) +#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT) + +#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC) +#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA) +#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT) +#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA) +#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_SH1_SHIFT 28 +#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT) +#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT) +#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_TG1_SHIFT 30 +#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT) +#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT) +#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) +#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) + #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) -- cgit v1.2.3-58-ga151 From acd05010400215b281a9197a889bec3e67998654 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 4 Apr 2016 11:53:52 +0100 Subject: arm64: Cleanup VTCR_EL2 and VTTBR field values We share most of the bits for VTCR_EL2 for different page sizes, except for the TG0 value and the entry level value. This patch makes the definitions a bit more cleaner to reflect this fact. Also cleans up the VTTBR_X calculation. No functional changes. Cc: Marc Zyngier Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/kvm_arm.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a46b39f62426..1281d98392a0 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -144,30 +144,32 @@ * The magic numbers used for VTTBR_X in this patch can be found in Tables * D4-23 and D4-25 in ARM DDI 0487A.b. */ + +#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B +#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) + #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: - * 40bits input (T0SZ = 24) * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ - VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) -#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) +#define VTTBR_X_TGRAN_MAGIC 38 #else /* * Stage2 translation configuration: - * 40bits input (T0SZ = 24) * 4kB pages (TG0 = 0) * 3 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ - VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) -#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1) +#define VTTBR_X_TGRAN_MAGIC 37 #endif +#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) +#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) + #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (UL(48)) -- cgit v1.2.3-58-ga151 From 120f0779c3ed89c25ef1db943feac8ed73a0d7f9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 1 Mar 2016 10:03:06 +0000 Subject: kvm arm: Move fake PGD handling to arch specific files Rearrange the code for fake pgd handling, which is applicable only for arm64. This will later be removed once we introduce the stage2 page table walker macros. Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/include/asm/kvm_mmu.h | 11 ++++++++-- arch/arm/kvm/mmu.c | 47 ++++++---------------------------------- arch/arm64/include/asm/kvm_mmu.h | 43 ++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index da44be9db4fa..c2b2b27b7da1 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -161,8 +161,6 @@ static inline bool kvm_page_empty(void *ptr) #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) #define kvm_pud_table_empty(kvm, pudp) (0) -#define KVM_PREALLOC_LEVEL 0 - static inline void *kvm_get_hwpgd(struct kvm *kvm) { return kvm->arch.pgd; @@ -173,6 +171,15 @@ static inline unsigned int kvm_get_hwpgd_size(void) return PTRS_PER_S2_PGD * sizeof(pgd_t); } +static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) +{ + return hwpgd; +} + +static inline void kvm_free_fake_pgd(pgd_t *pgd) +{ +} + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 58dbd5c439df..774d00b8066b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -684,47 +684,16 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!hwpgd) return -ENOMEM; - /* When the kernel uses more levels of page tables than the + /* + * When the kernel uses more levels of page tables than the * guest, we allocate a fake PGD and pre-populate it to point * to the next-level page table, which will be the real * initial page table pointed to by the VTTBR. - * - * When KVM_PREALLOC_LEVEL==2, we allocate a single page for - * the PMD and the kernel will use folded pud. - * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD - * pages. */ - if (KVM_PREALLOC_LEVEL > 0) { - int i; - - /* - * Allocate fake pgd for the page table manipulation macros to - * work. This is not used by the hardware and we have no - * alignment requirement for this allocation. - */ - pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), - GFP_KERNEL | __GFP_ZERO); - - if (!pgd) { - kvm_free_hwpgd(hwpgd); - return -ENOMEM; - } - - /* Plug the HW PGD into the fake one. */ - for (i = 0; i < PTRS_PER_S2_PGD; i++) { - if (KVM_PREALLOC_LEVEL == 1) - pgd_populate(NULL, pgd + i, - (pud_t *)hwpgd + i * PTRS_PER_PUD); - else if (KVM_PREALLOC_LEVEL == 2) - pud_populate(NULL, pud_offset(pgd, 0) + i, - (pmd_t *)hwpgd + i * PTRS_PER_PMD); - } - } else { - /* - * Allocate actual first-level Stage-2 page table used by the - * hardware for Stage-2 page table walks. - */ - pgd = (pgd_t *)hwpgd; + pgd = kvm_setup_fake_pgd(hwpgd); + if (IS_ERR(pgd)) { + kvm_free_hwpgd(hwpgd); + return PTR_ERR(pgd); } kvm_clean_pgd(pgd); @@ -831,9 +800,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); kvm_free_hwpgd(kvm_get_hwpgd(kvm)); - if (KVM_PREALLOC_LEVEL > 0) - kfree(kvm->arch.pgd); - + kvm_free_fake_pgd(kvm->arch.pgd); kvm->arch.pgd = NULL; } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 22732a5e3119..9a3409f7b37a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -208,6 +208,49 @@ static inline unsigned int kvm_get_hwpgd_size(void) return PTRS_PER_S2_PGD * sizeof(pgd_t); } +/* + * Allocate fake pgd for the host kernel page table macros to work. + * This is not used by the hardware and we have no alignment + * requirement for this allocation. + */ +static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) +{ + int i; + pgd_t *pgd; + + if (!KVM_PREALLOC_LEVEL) + return hwpgd; + + /* + * When KVM_PREALLOC_LEVEL==2, we allocate a single page for + * the PMD and the kernel will use folded pud. + * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD + * pages. + */ + + pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), + GFP_KERNEL | __GFP_ZERO); + if (!pgd) + return ERR_PTR(-ENOMEM); + + /* Plug the HW PGD into the fake one. */ + for (i = 0; i < PTRS_PER_S2_PGD; i++) { + if (KVM_PREALLOC_LEVEL == 1) + pgd_populate(NULL, pgd + i, + (pud_t *)hwpgd + i * PTRS_PER_PUD); + else if (KVM_PREALLOC_LEVEL == 2) + pud_populate(NULL, pud_offset(pgd, 0) + i, + (pmd_t *)hwpgd + i * PTRS_PER_PMD); + } + + return pgd; +} + +static inline void kvm_free_fake_pgd(pgd_t *pgd) +{ + if (KVM_PREALLOC_LEVEL > 0) + kfree(pgd); +} static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); -- cgit v1.2.3-58-ga151 From 0dbd3b18c63c81dec1a8c47667d89c54ade9b52a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 15 Mar 2016 10:46:34 +0000 Subject: arm64: Introduce pmd_thp_or_huge Add a helper to determine if a given pmd represents a huge page either by hugetlb or thp, as we have for arm. This will be used by KVM MMU code. Suggested-by: Mark Rutland Cc: Catalin Marinas Cc: Steve Capper Cc: Will Deacon Acked-by: Marc Zyngier Acked-by: Will Deacon Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 989fef16d461..dda4aa9ba3f8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -290,6 +290,8 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) +#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) + #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) -- cgit v1.2.3-58-ga151 From bbb3b6b35087539e75792b46e07b7ce5282d0979 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 1 Mar 2016 12:00:39 +0000 Subject: kvm-arm: Replace kvm_pmd_huge with pmd_thp_or_huge Both arm and arm64 now provides a helper, pmd_thp_or_huge() to check if the given pmd represents a huge page. Use that instead of our own custom check. Suggested-by: Mark Rutland Cc: Marc Zyngier Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 774d00b8066b..7837f0afa5a4 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector; #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) -#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) #define kvm_pud_huge(_x) pud_huge(_x) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -115,7 +114,7 @@ static bool kvm_is_device_pfn(unsigned long pfn) */ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) { - if (!kvm_pmd_huge(*pmd)) + if (!pmd_thp_or_huge(*pmd)) return; pmd_clear(pmd); @@ -177,7 +176,7 @@ static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); - VM_BUG_ON(kvm_pmd_huge(*pmd)); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); pte_free_kernel(NULL, pte_table); @@ -240,7 +239,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; pmd_clear(pmd); @@ -326,7 +325,7 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) + if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); else stage2_flush_ptes(kvm, pmd, addr, next); @@ -1050,7 +1049,7 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) kvm_set_s2pmd_readonly(pmd); } else { @@ -1331,7 +1330,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) if (!pmd || pmd_none(*pmd)) /* Nothing there */ goto out; - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ *pmd = pmd_mkyoung(*pmd); pfn = pmd_pfn(*pmd); pfn_valid = true; @@ -1555,7 +1554,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ if (pmd_young(*pmd)) { *pmd = pmd_mkold(*pmd); return 1; @@ -1585,7 +1584,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; - if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ return pmd_young(*pmd); pte = pte_offset_kernel(pmd, gpa); -- cgit v1.2.3-58-ga151 From 77b5665141a9a7e69d2f685ee2f2a3698fd27397 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 14:06:47 +0000 Subject: kvm-arm: Remove kvm_pud_huge() Get rid of kvm_pud_huge() which falls back to pud_huge. Use pud_huge instead. Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7837f0afa5a4..d0c0ee92c378 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -45,8 +45,6 @@ static phys_addr_t hyp_idmap_vector; #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) -#define kvm_pud_huge(_x) pud_huge(_x) - #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) @@ -1077,7 +1075,7 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(kvm_pud_huge(*pud)); + BUG_ON(pud_huge(*pud)); stage2_wp_pmds(pud, addr, next); } } while (pud++, addr = next, addr != end); -- cgit v1.2.3-58-ga151 From b1ae9a30f18d22e1ae544e4c37a032d018ebdac5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 14:08:17 +0000 Subject: kvm-arm: arm32: Introduce stage2 page table helpers Define the page table helpers for walking the stage2 pagetable for arm. Since both hyp and stage2 have the same number of levels, as that of the host we reuse the host helpers. The exceptions are the p.d_addr_end routines which have to deal with IPA > 32bit, hence we use the open coded version of their host helpers which supports 64bit. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 1 + arch/arm/include/asm/stage2_pgtable.h | 61 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 arch/arm/include/asm/stage2_pgtable.h (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index c2b2b27b7da1..7d207b44a656 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -47,6 +47,7 @@ #include #include #include +#include int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h new file mode 100644 index 000000000000..460d616bb2d6 --- /dev/null +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * + * stage2 page table helpers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_S2_PGTABLE_H_ +#define __ARM_S2_PGTABLE_H_ + +#define stage2_pgd_none(pgd) pgd_none(pgd) +#define stage2_pgd_clear(pgd) pgd_clear(pgd) +#define stage2_pgd_present(pgd) pgd_present(pgd) +#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) +#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) +#define stage2_pud_free(pud) pud_free(NULL, pud) + +#define stage2_pud_none(pud) pud_none(pud) +#define stage2_pud_clear(pud) pud_clear(pud) +#define stage2_pud_present(pud) pud_present(pud) +#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) +#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) +#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) + +#define stage2_pud_huge(pud) pud_huge(pud) + +/* Open coded p*d_addr_end that can deal with 64bit addresses */ +static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; + + return (boundary - 1 < end - 1) ? boundary : end; +} + +#define stage2_pud_addr_end(addr, end) (end) + +static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; + + return (boundary - 1 < end - 1) ? boundary : end; +} + +#define stage2_pgd_index(addr) pgd_index(addr) + +#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) +#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define stage2_pud_table_empty(pudp) false + +#endif /* __ARM_S2_PGTABLE_H_ */ -- cgit v1.2.3-58-ga151 From b1d030a73e9039dcdec951327e9ce9e8d682067f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 17:15:55 +0000 Subject: kvm-arm: arm: Introduce hyp page table empty checks Introduce hyp_pxx_table_empty helpers for checking whether a given table entry is empty. This will be used explicitly once we switch to explicit routines for hyp page table walk. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 7d207b44a656..5522cdd9dedf 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -160,7 +160,11 @@ static inline bool kvm_page_empty(void *ptr) #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) -#define kvm_pud_table_empty(kvm, pudp) (0) +#define kvm_pud_table_empty(kvm, pudp) false + +#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) +#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define hyp_pud_table_empty(pudp) false static inline void *kvm_get_hwpgd(struct kvm *kvm) { -- cgit v1.2.3-58-ga151 From c0ef6326dd393d84c9906240164e803536b5f3fc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 14:16:52 +0000 Subject: kvm-arm: arm64: Introduce stage2 page table helpers Introduce stage2 page table helpers for arm64. With the fake page table level still in place, the stage2 table has the same number of levels as that of the host (and hyp), so they all fallback to the host version. Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/kvm_mmu.h | 29 +---------- arch/arm64/include/asm/stage2_pgtable.h | 86 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 27 deletions(-) create mode 100644 arch/arm64/include/asm/stage2_pgtable.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 9a3409f7b37a..e3f53e3eb4d9 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -91,6 +91,8 @@ alternative_endif #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) +#include + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); @@ -156,35 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) -/* - * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address - * the entire IPA input range with a single pgd entry, and we would only need - * one pgd entry. Note that in this case, the pgd is actually not used by - * the MMU for Stage-2 translations, but is merely a fake pgd used as a data - * structure for the kernel pgtable macros to work. - */ -#if PGDIR_SHIFT > KVM_PHYS_SHIFT -#define PTRS_PER_S2_PGD_SHIFT 0 -#else -#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) -#endif -#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) - #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) -/* - * If we are concatenating first level stage-2 page tables, we would have less - * than or equal to 16 pointers in the fake PGD, because that's what the - * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) - * represents the first level for the host, and we add 1 to go to the next - * level (which uses contatenation) for the stage-2 tables. - */ -#if PTRS_PER_S2_PGD <= 16 -#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) -#else -#define KVM_PREALLOC_LEVEL (0) -#endif - static inline void *kvm_get_hwpgd(struct kvm *kvm) { pgd_t *pgd = kvm->arch.pgd; diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h new file mode 100644 index 000000000000..0ec218fe83c5 --- /dev/null +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * + * stage2 page table helpers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_S2_PGTABLE_H_ +#define __ARM64_S2_PGTABLE_H_ + +#include + +/* + * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address + * the entire IPA input range with a single pgd entry, and we would only need + * one pgd entry. Note that in this case, the pgd is actually not used by + * the MMU for Stage-2 translations, but is merely a fake pgd used as a data + * structure for the kernel pgtable macros to work. + */ +#if PGDIR_SHIFT > KVM_PHYS_SHIFT +#define PTRS_PER_S2_PGD_SHIFT 0 +#else +#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) +#endif +#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) + +/* + * If we are concatenating first level stage-2 page tables, we would have less + * than or equal to 16 pointers in the fake PGD, because that's what the + * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) + * represents the first level for the host, and we add 1 to go to the next + * level (which uses contatenation) for the stage-2 tables. + */ +#if PTRS_PER_S2_PGD <= 16 +#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) +#else +#define KVM_PREALLOC_LEVEL (0) +#endif + +#define stage2_pgd_none(pgd) pgd_none(pgd) +#define stage2_pgd_clear(pgd) pgd_clear(pgd) +#define stage2_pgd_present(pgd) pgd_present(pgd) +#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) +#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) +#define stage2_pud_free(pud) pud_free(NULL, pud) + +#define stage2_pud_none(pud) pud_none(pud) +#define stage2_pud_clear(pud) pud_clear(pud) +#define stage2_pud_present(pud) pud_present(pud) +#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) +#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) +#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) + +#define stage2_pud_huge(pud) pud_huge(pud) + +#define stage2_pgd_addr_end(address, end) pgd_addr_end(address, end) +#define stage2_pud_addr_end(address, end) pud_addr_end(address, end) +#define stage2_pmd_addr_end(address, end) pmd_addr_end(address, end) + +#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) +#ifdef __PGTABLE_PMD_FOLDED +#define stage2_pmd_table_empty(pmdp) (0) +#else +#define stage2_pmd_table_empty(pmdp) ((KVM_PREALLOC_LEVEL < 2) && kvm_page_empty(pmdp)) +#endif + +#ifdef __PGTABLE_PUD_FOLDED +#define stage2_pud_table_empty(pudp) (0) +#else +#define stage2_pud_table_empty(pudp) ((KVM_PREALLOC_LEVEL < 1) && kvm_page_empty(pudp)) +#endif + +#define stage2_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) + +#endif /* __ARM64_S2_PGTABLE_H_ */ -- cgit v1.2.3-58-ga151 From 66f877faf9cc23232f25b423758eaa167de1ad09 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 17:20:28 +0000 Subject: kvm-arm: arm64: Introduce hyp page table empty checks Introduce hyp_pxx_table_empty helpers for checking whether a given table entry is empty. This will be used explicitly once we switch to explicit routines for hyp page table walk. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/kvm_mmu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e3f53e3eb4d9..edf3c62c660e 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -249,6 +249,20 @@ static inline bool kvm_page_empty(void *ptr) #endif +#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) + +#ifdef __PAGETABLE_PMD_FOLDED +#define hyp_pmd_table_empty(pmdp) (0) +#else +#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#endif + +#ifdef __PAGETABLE_PUD_FOLDED +#define hyp_pud_table_empty(pudp) (0) +#else +#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) +#endif + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) -- cgit v1.2.3-58-ga151 From 70fd19068573e449d47eb2daa69cf5db541ef4f5 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 18:33:45 +0000 Subject: kvm-arm: Use explicit stage2 helper routines We have stage2 page table helpers for both arm and arm64. Switch to the stage2 helpers for routines that only deal with stage2 page table. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d0c0ee92c378..f93f717b5d8b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -319,9 +319,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, pmd_t *pmd; phys_addr_t next; - pmd = pmd_offset(pud, addr); + pmd = stage2_pmd_offset(pud, addr); do { - next = kvm_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); @@ -337,11 +337,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, pud_t *pud; phys_addr_t next; - pud = pud_offset(pgd, addr); + pud = stage2_pud_offset(pgd, addr); do { - next = kvm_pud_addr_end(addr, end); - if (!pud_none(*pud)) { - if (pud_huge(*pud)) + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + if (stage2_pud_huge(*pud)) kvm_flush_dcache_pud(*pud); else stage2_flush_pmds(kvm, pud, addr, next); @@ -357,9 +357,9 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + kvm_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { - next = kvm_pgd_addr_end(addr, end); + next = stage2_pgd_addr_end(addr, end); stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -807,16 +807,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + kvm_pgd_index(addr); - if (WARN_ON(pgd_none(*pgd))) { + pgd = kvm->arch.pgd + stage2_pgd_index(addr); + if (WARN_ON(stage2_pgd_none(*pgd))) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); - pgd_populate(NULL, pgd, pud); + stage2_pgd_populate(pgd, pud); get_page(virt_to_page(pgd)); } - return pud_offset(pgd, addr); + return stage2_pud_offset(pgd, addr); } static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -826,15 +826,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pmd_t *pmd; pud = stage2_get_pud(kvm, cache, addr); - if (pud_none(*pud)) { + if (stage2_pud_none(*pud)) { if (!cache) return NULL; pmd = mmu_memory_cache_alloc(cache); - pud_populate(NULL, pud, pmd); + stage2_pud_populate(pud, pmd); get_page(virt_to_page(pud)); } - return pmd_offset(pud, addr); + return stage2_pmd_offset(pud, addr); } static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache @@ -1042,10 +1042,10 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) pmd_t *pmd; phys_addr_t next; - pmd = pmd_offset(pud, addr); + pmd = stage2_pmd_offset(pud, addr); do { - next = kvm_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) @@ -1070,12 +1070,12 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) pud_t *pud; phys_addr_t next; - pud = pud_offset(pgd, addr); + pud = stage2_pud_offset(pgd, addr); do { - next = kvm_pud_addr_end(addr, end); - if (!pud_none(*pud)) { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(pud_huge(*pud)); + BUG_ON(stage2_pud_huge(*pud)); stage2_wp_pmds(pud, addr, next); } } while (pud++, addr = next, addr != end); @@ -1092,7 +1092,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + kvm_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1104,8 +1104,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) if (need_resched() || spin_needbreak(&kvm->mmu_lock)) cond_resched_lock(&kvm->mmu_lock); - next = kvm_pgd_addr_end(addr, end); - if (pgd_present(*pgd)) + next = stage2_pgd_addr_end(addr, end); + if (stage2_pgd_present(*pgd)) stage2_wp_puds(pgd, addr, next); } while (pgd++, addr = next, addr != end); } -- cgit v1.2.3-58-ga151 From 64f324979210d4064adf64f19da40c125c9dd137 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 18:56:21 +0000 Subject: kvm-arm: Add explicit hyp page table modifiers We have common routines to modify hyp and stage2 page tables based on the 'kvm' parameter. For a smoother transition to using separate routines for each, duplicate the routines and modify the copy to work on hyp. Marks the forked routines with _hyp_ and gets rid of the kvm parameter which is no longer needed and is NULL for hyp. Also, gets rid of calls to kvm_tlb_flush_by_vmid_ipa() calls from the hyp versions. Uses explicit host page table accessors instead of the kvm_* page table helpers. Suggested-by: Christoffer Dall Cc: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 99 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f93f717b5d8b..af526f67022c 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -388,6 +388,100 @@ static void stage2_flush_vm(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } +static void clear_hyp_pgd_entry(pgd_t *pgd) +{ + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); + pgd_clear(pgd); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_hyp_pud_entry(pud_t *pud) +{ + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_hyp_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (hyp_pte_table_empty(start_pte)) + clear_hyp_pmd_entry(pmd); +} + +static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* Hyp doesn't use huge pmds */ + if (!pmd_none(*pmd)) + unmap_hyp_ptes(pmd, addr, next); + } while (pmd++, addr = next, addr != end); + + if (hyp_pmd_table_empty(start_pmd)) + clear_hyp_pud_entry(pud); +} + +static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + /* Hyp doesn't use huge puds */ + if (!pud_none(*pud)) + unmap_hyp_pmds(pud, addr, next); + } while (pud++, addr = next, addr != end); + + if (hyp_pud_table_empty(start_pud)) + clear_hyp_pgd_entry(pgd); +} + +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + /* + * We don't unmap anything from HYP, except at the hyp tear down. + * Hence, we don't have to invalidate the TLBs here. + */ + pgd = pgdp + pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + if (!pgd_none(*pgd)) + unmap_hyp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + /** * free_boot_hyp_pgd - free HYP boot page tables * @@ -398,14 +492,14 @@ void free_boot_hyp_pgd(void) mutex_lock(&kvm_hyp_pgd_mutex); if (boot_hyp_pgd) { - unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); - unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) - unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); mutex_unlock(&kvm_hyp_pgd_mutex); } @@ -430,9 +524,9 @@ void free_hyp_pgds(void) if (hyp_pgd) { for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; -- cgit v1.2.3-58-ga151 From 7a1c831ee8553b8199f21183942a46adf808f174 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 23 Mar 2016 12:08:02 +0000 Subject: kvm-arm: Add stage2 page table modifiers Now that the hyp page table is handled by different set of routines, rename the original shared routines to stage2 handlers. Also make explicit use of the stage2 page table helpers. unmap_range has been merged to existing unmap_stage2_range. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/kvm/mmu.c | 97 +++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 53 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index af526f67022c..f2a6d9b8ca2d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -152,26 +152,26 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); - pgd_clear(pgd); + pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); + stage2_pgd_clear(pgd); kvm_tlb_flush_vmid_ipa(kvm, addr); - pud_free(NULL, pud_table); + stage2_pud_free(pud_table); put_page(virt_to_page(pgd)); } -static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table = pmd_offset(pud, 0); - VM_BUG_ON(pud_huge(*pud)); - pud_clear(pud); + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); + VM_BUG_ON(stage2_pud_huge(*pud)); + stage2_pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - pmd_free(NULL, pmd_table); + stage2_pmd_free(pmd_table); put_page(virt_to_page(pud)); } -static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); @@ -201,7 +201,7 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure * the IO subsystem will never hit in the cache. */ -static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, +static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; @@ -223,19 +223,19 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (kvm_pte_table_empty(kvm, start_pte)) - clear_pmd_entry(kvm, pmd, start_addr); + if (stage2_pte_table_empty(start_pte)) + clear_stage2_pmd_entry(kvm, pmd, start_addr); } -static void unmap_pmds(struct kvm *kvm, pud_t *pud, +static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, phys_addr_t addr, phys_addr_t end) { phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; - start_pmd = pmd = pmd_offset(pud, addr); + start_pmd = pmd = stage2_pmd_offset(pud, addr); do { - next = kvm_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; @@ -247,57 +247,64 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, put_page(virt_to_page(pmd)); } else { - unmap_ptes(kvm, pmd, addr, next); + unmap_stage2_ptes(kvm, pmd, addr, next); } } } while (pmd++, addr = next, addr != end); - if (kvm_pmd_table_empty(kvm, start_pmd)) - clear_pud_entry(kvm, pud, start_addr); + if (stage2_pmd_table_empty(start_pmd)) + clear_stage2_pud_entry(kvm, pud, start_addr); } -static void unmap_puds(struct kvm *kvm, pgd_t *pgd, +static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; - start_pud = pud = pud_offset(pgd, addr); + start_pud = pud = stage2_pud_offset(pgd, addr); do { - next = kvm_pud_addr_end(addr, end); - if (!pud_none(*pud)) { - if (pud_huge(*pud)) { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + if (stage2_pud_huge(*pud)) { pud_t old_pud = *pud; - pud_clear(pud); + stage2_pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - kvm_flush_dcache_pud(old_pud); - put_page(virt_to_page(pud)); } else { - unmap_pmds(kvm, pud, addr, next); + unmap_stage2_pmds(kvm, pud, addr, next); } } } while (pud++, addr = next, addr != end); - if (kvm_pud_table_empty(kvm, start_pud)) - clear_pgd_entry(kvm, pgd, start_addr); + if (stage2_pud_table_empty(start_pud)) + clear_stage2_pgd_entry(kvm, pgd, start_addr); } - -static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - phys_addr_t start, u64 size) +/** + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range + * @kvm: The VM pointer + * @start: The intermediate physical base address of the range to unmap + * @size: The size of the area to unmap + * + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must + * be called while holding mmu_lock (unless for freeing the stage2 pgd before + * destroying the VM), otherwise another faulting VCPU may come in and mess + * with things behind our backs. + */ +static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { pgd_t *pgd; phys_addr_t addr = start, end = start + size; phys_addr_t next; - pgd = pgdp + kvm_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { - next = kvm_pgd_addr_end(addr, end); - if (!pgd_none(*pgd)) - unmap_puds(kvm, pgd, addr, next); + next = stage2_pgd_addr_end(addr, end); + if (!stage2_pgd_none(*pgd)) + unmap_stage2_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -792,22 +799,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return 0; } -/** - * unmap_stage2_range -- Clear stage2 page table entries to unmap a range - * @kvm: The VM pointer - * @start: The intermediate physical base address of the range to unmap - * @size: The size of the area to unmap - * - * Clear a range of stage-2 mappings, lowering the various ref-counts. Must - * be called while holding mmu_lock (unless for freeing the stage2 pgd before - * destroying the VM), otherwise another faulting VCPU may come in and mess - * with things behind our backs. - */ -static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) -{ - unmap_range(kvm, kvm->arch.pgd, start, size); -} - static void stage2_unmap_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { -- cgit v1.2.3-58-ga151 From 8684e701df5a3f52e3ff580128cbd5d71fcd5f5c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 17:14:25 +0000 Subject: kvm-arm: Cleanup kvm_* wrappers Now that we have switched to explicit page table routines, get rid of the obsolete kvm_* wrappers. Also, kvm_tlb_flush_vmid_by_ipa is now called only on stage2 page tables, hence get rid of the redundant check. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/include/asm/kvm_mmu.h | 16 ---------------- arch/arm/kvm/mmu.c | 9 +-------- arch/arm64/include/asm/kvm_mmu.h | 24 ------------------------ 3 files changed, 1 insertion(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5522cdd9dedf..a7736d53a408 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -136,22 +136,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; } - -/* Open coded p*d_addr_end that can deal with 64bit addresses */ -#define kvm_pgd_addr_end(addr, end) \ -({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) - -#define kvm_pud_addr_end(addr,end) (end) - -#define kvm_pmd_addr_end(addr, end) \ -({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ - (__boundary - 1 < (end) - 1)? __boundary: (end); \ -}) - -#define kvm_pgd_index(addr) pgd_index(addr) - static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f2a6d9b8ca2d..d3fa96e0f709 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -66,14 +66,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { - /* - * This function also gets called when dealing with HYP page - * tables. As HYP doesn't have an associated struct kvm (and - * the HYP page tables are fairly static), we don't do - * anything there. - */ - if (kvm) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index edf3c62c660e..a3c0d05311ef 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -153,13 +153,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; } - -#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) -#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) -#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) - -#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) - static inline void *kvm_get_hwpgd(struct kvm *kvm) { pgd_t *pgd = kvm->arch.pgd; @@ -232,23 +225,6 @@ static inline bool kvm_page_empty(void *ptr) return page_count(ptr_page) == 1; } -#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) - -#ifdef __PAGETABLE_PMD_FOLDED -#define kvm_pmd_table_empty(kvm, pmdp) (0) -#else -#define kvm_pmd_table_empty(kvm, pmdp) \ - (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2)) -#endif - -#ifdef __PAGETABLE_PUD_FOLDED -#define kvm_pud_table_empty(kvm, pudp) (0) -#else -#define kvm_pud_table_empty(kvm, pudp) \ - (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1)) -#endif - - #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) #ifdef __PAGETABLE_PMD_FOLDED -- cgit v1.2.3-58-ga151 From da04fa04dc91e7dae79629f28804391cbcf6e604 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 23 Mar 2016 12:22:33 +0000 Subject: kvm: arm64: Get rid of fake page table levels On arm64, the hardware supports concatenation of upto 16 tables, at entry level for stage2 translations and we make use that whenever possible. This could lead to reduced number of translation levels than the normal (stage1 table) table. Also, since the IPA(40bit) is smaller than the some of the supported VA_BITS (e.g, 48bit), there could be different number of levels in stage-1 vs stage-2 tables. To reuse the kernel host page table walker for stage2 we have been using a fake software page table level, not known to the hardware. But with 16K translations, there could be upto 2 fake software levels (with 48bit VA and 40bit IPA), which complicates the code. Hence, we want to get rid of the hack. Now that we have explicit accessors for hyp vs stage2 page tables, define the stage2 walker helpers accordingly based on the actual table used by the hardware. Once we know the number of translation levels used by the hardware, it is merely a job of defining the helpers based on whether a particular level is folded or not, looking at the number of levels. Some facts before we calculate the translation levels: 1) Smallest page size supported by arm64 is 4K. 2) The minimum number of bits resolved at any page table level is (PAGE_SHIFT - 3) at intermediate levels. Both of them implies, minimum number of bits required for a level change is 9. Since we can concatenate upto 16 tables at stage2 entry, the total number of page table levels used by the hardware for resolving N bits is same as that for (N - 4) bits (with concatenation), as there cannot be a level in between (N, N-4) as per the above rules. Hence, we have STAGE2_PGTABLE_LEVELS = PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4) With the current IPA limit (40bit), for all supported translations and VA_BITS, we have the following condition (even for 36bit VA with 16K page size): CONFIG_PGTABLE_LEVELS >= STAGE2_PGTABLE_LEVELS. So, for e.g, if PUD is present in stage2, it is present in the hyp(host). Hence, we fall back to the host definition if we find that a level is not folded. Otherwise we redefine it accordingly. A build time check is added to make sure the above condition holds. If this condition breaks in future, we can rearrange the host level helpers and fix our code easily. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/kvm_mmu.h | 64 +------------- arch/arm64/include/asm/stage2_pgtable-nopmd.h | 42 +++++++++ arch/arm64/include/asm/stage2_pgtable-nopud.h | 39 ++++++++ arch/arm64/include/asm/stage2_pgtable.h | 122 +++++++++++++++++++------- 4 files changed, 172 insertions(+), 95 deletions(-) create mode 100644 arch/arm64/include/asm/stage2_pgtable-nopmd.h create mode 100644 arch/arm64/include/asm/stage2_pgtable-nopud.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a3c0d05311ef..e3fee0acd1a2 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -45,18 +45,6 @@ */ #define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) -/* - * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation - * levels in addition to the PGD and potentially the PUD which are - * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2 - * tables use one level of tables less than the kernel. - */ -#ifdef CONFIG_ARM64_64K_PAGES -#define KVM_MMU_CACHE_MIN_PAGES 1 -#else -#define KVM_MMU_CACHE_MIN_PAGES 2 -#endif - #ifdef __ASSEMBLY__ #include @@ -155,69 +143,21 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) static inline void *kvm_get_hwpgd(struct kvm *kvm) { - pgd_t *pgd = kvm->arch.pgd; - pud_t *pud; - - if (KVM_PREALLOC_LEVEL == 0) - return pgd; - - pud = pud_offset(pgd, 0); - if (KVM_PREALLOC_LEVEL == 1) - return pud; - - BUG_ON(KVM_PREALLOC_LEVEL != 2); - return pmd_offset(pud, 0); + return kvm->arch.pgd; } static inline unsigned int kvm_get_hwpgd_size(void) { - if (KVM_PREALLOC_LEVEL > 0) - return PTRS_PER_S2_PGD * PAGE_SIZE; return PTRS_PER_S2_PGD * sizeof(pgd_t); } -/* - * Allocate fake pgd for the host kernel page table macros to work. - * This is not used by the hardware and we have no alignment - * requirement for this allocation. - */ static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) { - int i; - pgd_t *pgd; - - if (!KVM_PREALLOC_LEVEL) - return hwpgd; - - /* - * When KVM_PREALLOC_LEVEL==2, we allocate a single page for - * the PMD and the kernel will use folded pud. - * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD - * pages. - */ - - pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), - GFP_KERNEL | __GFP_ZERO); - if (!pgd) - return ERR_PTR(-ENOMEM); - - /* Plug the HW PGD into the fake one. */ - for (i = 0; i < PTRS_PER_S2_PGD; i++) { - if (KVM_PREALLOC_LEVEL == 1) - pgd_populate(NULL, pgd + i, - (pud_t *)hwpgd + i * PTRS_PER_PUD); - else if (KVM_PREALLOC_LEVEL == 2) - pud_populate(NULL, pud_offset(pgd, 0) + i, - (pmd_t *)hwpgd + i * PTRS_PER_PMD); - } - - return pgd; + return hwpgd; } static inline void kvm_free_fake_pgd(pgd_t *pgd) { - if (KVM_PREALLOC_LEVEL > 0) - kfree(pgd); } static inline bool kvm_page_empty(void *ptr) { diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h new file mode 100644 index 000000000000..2656a0fd05a6 --- /dev/null +++ b/arch/arm64/include/asm/stage2_pgtable-nopmd.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_S2_PGTABLE_NOPMD_H_ +#define __ARM64_S2_PGTABLE_NOPMD_H_ + +#include + +#define __S2_PGTABLE_PMD_FOLDED + +#define S2_PMD_SHIFT S2_PUD_SHIFT +#define S2_PTRS_PER_PMD 1 +#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) +#define S2_PMD_MASK (~(S2_PMD_SIZE-1)) + +#define stage2_pud_none(pud) (0) +#define stage2_pud_present(pud) (1) +#define stage2_pud_clear(pud) do { } while (0) +#define stage2_pud_populate(pud, pmd) do { } while (0) +#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud)) + +#define stage2_pmd_free(pmd) do { } while (0) + +#define stage2_pmd_addr_end(addr, end) (end) + +#define stage2_pud_huge(pud) (0) +#define stage2_pmd_table_empty(pmdp) (0) + +#endif diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h new file mode 100644 index 000000000000..5ee87b54ebf3 --- /dev/null +++ b/arch/arm64/include/asm/stage2_pgtable-nopud.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2016 - ARM Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_S2_PGTABLE_NOPUD_H_ +#define __ARM64_S2_PGTABLE_NOPUD_H_ + +#define __S2_PGTABLE_PUD_FOLDED + +#define S2_PUD_SHIFT S2_PGDIR_SHIFT +#define S2_PTRS_PER_PUD 1 +#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) +#define S2_PUD_MASK (~(S2_PUD_SIZE-1)) + +#define stage2_pgd_none(pgd) (0) +#define stage2_pgd_present(pgd) (1) +#define stage2_pgd_clear(pgd) do { } while (0) +#define stage2_pgd_populate(pgd, pud) do { } while (0) + +#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd)) + +#define stage2_pud_free(x) do { } while (0) + +#define stage2_pud_addr_end(addr, end) (end) +#define stage2_pud_table_empty(pmdp) (0) + +#endif diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 0ec218fe83c5..8b68099348e5 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -22,32 +22,61 @@ #include /* - * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address - * the entire IPA input range with a single pgd entry, and we would only need - * one pgd entry. Note that in this case, the pgd is actually not used by - * the MMU for Stage-2 translations, but is merely a fake pgd used as a data - * structure for the kernel pgtable macros to work. + * The hardware supports concatenation of up to 16 tables at stage2 entry level + * and we use the feature whenever possible. + * + * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3). + * On arm64, the smallest PAGE_SIZE supported is 4k, which means + * (PAGE_SHIFT - 3) > 4 holds for all page sizes. + * This implies, the total number of page table levels at stage2 expected + * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4) + * in normal translations(e.g, stage1), since we cannot have another level in + * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4). */ -#if PGDIR_SHIFT > KVM_PHYS_SHIFT -#define PTRS_PER_S2_PGD_SHIFT 0 -#else -#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) -#endif -#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) +#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4) /* - * If we are concatenating first level stage-2 page tables, we would have less - * than or equal to 16 pointers in the fake PGD, because that's what the - * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) - * represents the first level for the host, and we add 1 to go to the next - * level (which uses contatenation) for the stage-2 tables. + * With all the supported VA_BITs and 40bit guest IPA, the following condition + * is always true: + * + * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS + * + * We base our stage-2 page table walker helpers on this assumption and + * fall back to using the host version of the helper wherever possible. + * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back + * to using the host version, since it is guaranteed it is not folded at host. + * + * If the condition breaks in the future, we can rearrange the host level + * definitions and reuse them for stage2. Till then... */ -#if PTRS_PER_S2_PGD <= 16 -#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) -#else -#define KVM_PREALLOC_LEVEL (0) +#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS +#error "Unsupported combination of guest IPA and host VA_BITS." #endif +/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */ +#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS) +#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT) +#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1)) + +/* + * The number of PTRS across all concatenated stage2 tables given by the + * number of bits resolved at the initial level. + */ +#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT)) + +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation + * levels in addition to the PGD. + */ +#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1) + + +#if STAGE2_PGTABLE_LEVELS > 3 + +#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) +#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) +#define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) + #define stage2_pgd_none(pgd) pgd_none(pgd) #define stage2_pgd_clear(pgd) pgd_clear(pgd) #define stage2_pgd_present(pgd) pgd_present(pgd) @@ -55,6 +84,24 @@ #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) #define stage2_pud_free(pud) pud_free(NULL, pud) +#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp) + +static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; + + return (boundary - 1 < end - 1) ? boundary : end; +} + +#endif /* STAGE2_PGTABLE_LEVELS > 3 */ + + +#if STAGE2_PGTABLE_LEVELS > 2 + +#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) +#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT) +#define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) + #define stage2_pud_none(pud) pud_none(pud) #define stage2_pud_clear(pud) pud_clear(pud) #define stage2_pud_present(pud) pud_present(pud) @@ -63,24 +110,33 @@ #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) #define stage2_pud_huge(pud) pud_huge(pud) +#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) + +static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; -#define stage2_pgd_addr_end(address, end) pgd_addr_end(address, end) -#define stage2_pud_addr_end(address, end) pud_addr_end(address, end) -#define stage2_pmd_addr_end(address, end) pmd_addr_end(address, end) + return (boundary - 1 < end - 1) ? boundary : end; +} + +#endif /* STAGE2_PGTABLE_LEVELS > 2 */ #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) -#ifdef __PGTABLE_PMD_FOLDED -#define stage2_pmd_table_empty(pmdp) (0) -#else -#define stage2_pmd_table_empty(pmdp) ((KVM_PREALLOC_LEVEL < 2) && kvm_page_empty(pmdp)) -#endif -#ifdef __PGTABLE_PUD_FOLDED -#define stage2_pud_table_empty(pudp) (0) -#else -#define stage2_pud_table_empty(pudp) ((KVM_PREALLOC_LEVEL < 1) && kvm_page_empty(pudp)) +#if STAGE2_PGTABLE_LEVELS == 2 +#include +#elif STAGE2_PGTABLE_LEVELS == 3 +#include #endif -#define stage2_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) + +#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) + +static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK; + + return (boundary - 1 < end - 1) ? boundary : end; +} #endif /* __ARM64_S2_PGTABLE_H_ */ -- cgit v1.2.3-58-ga151 From 9163ee23e72333e4712f7edd1a49aef06eae6304 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Mar 2016 17:01:21 +0000 Subject: kvm-arm: Cleanup stage2 pgd handling Now that we don't have any fake page table levels for arm64, cleanup the common code to get rid of the dead code. Cc: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm/include/asm/kvm_mmu.h | 19 ------------------- arch/arm/kvm/arm.c | 2 +- arch/arm/kvm/mmu.c | 37 ++++++------------------------------- arch/arm64/include/asm/kvm_mmu.h | 18 ------------------ 4 files changed, 7 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index a7736d53a408..6344ea0ad624 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -150,25 +150,6 @@ static inline bool kvm_page_empty(void *ptr) #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) #define hyp_pud_table_empty(pudp) false -static inline void *kvm_get_hwpgd(struct kvm *kvm) -{ - return kvm->arch.pgd; -} - -static inline unsigned int kvm_get_hwpgd_size(void) -{ - return PTRS_PER_S2_PGD * sizeof(pgd_t); -} - -static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) -{ - return hwpgd; -} - -static inline void kvm_free_fake_pgd(pgd_t *pgd) -{ -} - struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index dded1b763c16..be4b6394a062 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -448,7 +448,7 @@ static void update_vttbr(struct kvm *kvm) kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; /* update vttbr to be used with the new vmid */ - pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); + pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = pgd_phys | vmid; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index d3fa96e0f709..42eefab3e8e1 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -43,6 +43,7 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -736,20 +737,6 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } -/* Free the HW pgd, one page at a time */ -static void kvm_free_hwpgd(void *hwpgd) -{ - free_pages_exact(hwpgd, kvm_get_hwpgd_size()); -} - -/* Allocate the HW PGD, making sure that each page gets its own refcount */ -static void *kvm_alloc_hwpgd(void) -{ - unsigned int size = kvm_get_hwpgd_size(); - - return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); -} - /** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. @@ -764,29 +751,17 @@ static void *kvm_alloc_hwpgd(void) int kvm_alloc_stage2_pgd(struct kvm *kvm) { pgd_t *pgd; - void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } - hwpgd = kvm_alloc_hwpgd(); - if (!hwpgd) + /* Allocate the HW PGD, making sure that each page gets its own refcount */ + pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); + if (!pgd) return -ENOMEM; - /* - * When the kernel uses more levels of page tables than the - * guest, we allocate a fake PGD and pre-populate it to point - * to the next-level page table, which will be the real - * initial page table pointed to by the VTTBR. - */ - pgd = kvm_setup_fake_pgd(hwpgd); - if (IS_ERR(pgd)) { - kvm_free_hwpgd(hwpgd); - return PTR_ERR(pgd); - } - kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; @@ -874,8 +849,8 @@ void kvm_free_stage2_pgd(struct kvm *kvm) return; unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - kvm_free_hwpgd(kvm_get_hwpgd(kvm)); - kvm_free_fake_pgd(kvm->arch.pgd); + /* Free the HW pgd, one page at a time */ + free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); kvm->arch.pgd = NULL; } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e3fee0acd1a2..249c4fc9c5f6 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -141,24 +141,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; } -static inline void *kvm_get_hwpgd(struct kvm *kvm) -{ - return kvm->arch.pgd; -} - -static inline unsigned int kvm_get_hwpgd_size(void) -{ - return PTRS_PER_S2_PGD * sizeof(pgd_t); -} - -static inline pgd_t *kvm_setup_fake_pgd(pgd_t *hwpgd) -{ - return hwpgd; -} - -static inline void kvm_free_fake_pgd(pgd_t *pgd) -{ -} static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); -- cgit v1.2.3-58-ga151 From 02e0b7600f8350078f01328095c20dd715700921 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 17 Mar 2016 14:29:24 +0000 Subject: arm64: kvm: Add support for 16K pages Now that we can handle stage-2 page tables independent of the host page table levels, wire up the 16K page support. Cc: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose --- arch/arm64/include/asm/kvm_arm.h | 13 +++++++++++-- arch/arm64/kvm/Kconfig | 1 - 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1281d98392a0..c6cbb361bbcf 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -114,6 +114,7 @@ #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK #define VTCR_EL2_TG0_MASK TCR_TG0_MASK #define VTCR_EL2_TG0_4K TCR_TG0_4K +#define VTCR_EL2_TG0_16K TCR_TG0_16K #define VTCR_EL2_TG0_64K TCR_TG0_64K #define VTCR_EL2_SH0_MASK TCR_SH0_MASK #define VTCR_EL2_SH0_INNER TCR_SH0_INNER @@ -139,7 +140,7 @@ * (see hyp-init.S). * * Note that when using 4K pages, we concatenate two first level page tables - * together. + * together. With 16K pages, we concatenate 16 first level page tables. * * The magic numbers used for VTTBR_X in this patch can be found in Tables * D4-23 and D4-25 in ARM DDI 0487A.b. @@ -157,7 +158,15 @@ */ #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) #define VTTBR_X_TGRAN_MAGIC 38 -#else +#elif defined(CONFIG_ARM64_16K_PAGES) +/* + * Stage2 translation configuration: + * 16kB pages (TG0 = 2) + * 2 level page tables (SL = 1) + */ +#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1) +#define VTTBR_X_TGRAN_MAGIC 42 +#else /* 4K */ /* * Stage2 translation configuration: * 4kB pages (TG0 = 0) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index de7450df7629..aa2e34e99582 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -22,7 +22,6 @@ config KVM_ARM_VGIC_V3 config KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF - depends on !ARM64_16K_PAGES select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES -- cgit v1.2.3-58-ga151 From d4b9e0790aa764c0b01e18d4e8d33e93ba36d51f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 28 Apr 2016 16:16:31 +0100 Subject: arm/arm64: KVM: Enforce Break-Before-Make on Stage-2 page tables The ARM architecture mandates that when changing a page table entry from a valid entry to another valid entry, an invalid entry is first written, TLB invalidated, and only then the new entry being written. The current code doesn't respect this, directly writing the new entry and only then invalidating TLBs. Let's fix it up. Cc: Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 42eefab3e8e1..74b5d199f6b7 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -910,11 +910,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); old_pmd = *pmd; - kvm_set_pmd(pmd, *new_pmd); - if (pmd_present(old_pmd)) + if (pmd_present(old_pmd)) { + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); - else + } else { get_page(virt_to_page(pmd)); + } + + kvm_set_pmd(pmd, *new_pmd); return 0; } @@ -963,12 +966,14 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; - kvm_set_pte(pte, *new_pte); - if (pte_present(old_pte)) + if (pte_present(old_pte)) { + kvm_set_pte(pte, __pte(0)); kvm_tlb_flush_vmid_ipa(kvm, addr); - else + } else { get_page(virt_to_page(pte)); + } + kvm_set_pte(pte, *new_pte); return 0; } -- cgit v1.2.3-58-ga151 From 06485053244480f5f403d8f89b8617bd7d549113 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 13 Apr 2016 17:57:37 +0100 Subject: kvm: arm64: Enable hardware updates of the Access Flag for Stage 2 page tables The ARMv8.1 architecture extensions introduce support for hardware updates of the access and dirty information in page table entries. With VTCR_EL2.HA enabled (bit 21), when the CPU accesses an IPA with the PTE_AF bit cleared in the stage 2 page table, instead of raising an Access Flag fault to EL2 the CPU sets the actual page table entry bit (10). To ensure that kernel modifications to the page table do not inadvertently revert a bit set by hardware updates, certain Stage 2 software pte/pmd operations must be performed atomically. The main user of the AF bit is the kvm_age_hva() mechanism. The kvm_age_hva_handler() function performs a "test and clear young" action on the pte/pmd. This needs to be atomic in respect of automatic hardware updates of the AF bit. Since the AF bit is in the same position for both Stage 1 and Stage 2, the patch reuses the existing ptep_test_and_clear_young() functionality if __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG is defined. Otherwise, the existing pte_young/pte_mkold mechanism is preserved. The kvm_set_s2pte_readonly() (and the corresponding pmd equivalent) have to perform atomic modifications in order to avoid a race with updates of the AF bit. The arm64 implementation has been re-written using exclusives. Currently, kvm_set_s2pte_writable() (and pmd equivalent) take a pointer argument and modify the pte/pmd in place. However, these functions are only used on local variables rather than actual page table entries, so it makes more sense to follow the pte_mkwrite() approach for stage 1 attributes. The change to kvm_s2pte_mkwrite() makes it clear that these functions do not modify the actual page table entries. The (pte|pmd)_mkyoung() uses on Stage 2 entries (setting the AF bit explicitly) do not need to be modified since hardware updates of the dirty status are not supported by KVM, so there is no possibility of losing such information. Signed-off-by: Catalin Marinas Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 10 +++++---- arch/arm/kvm/mmu.c | 46 +++++++++++++++++++++++++--------------- arch/arm64/include/asm/kvm_arm.h | 2 ++ arch/arm64/include/asm/kvm_mmu.h | 27 +++++++++++++++++------ arch/arm64/include/asm/pgtable.h | 13 ++++++++---- arch/arm64/kvm/hyp/s2-setup.c | 8 +++++++ 6 files changed, 74 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 6344ea0ad624..ef0b276d97fc 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -106,14 +106,16 @@ static inline void kvm_clean_pte(pte_t *pte) clean_pte_table(pte); } -static inline void kvm_set_s2pte_writable(pte_t *pte) +static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { - pte_val(*pte) |= L_PTE_S2_RDWR; + pte_val(pte) |= L_PTE_S2_RDWR; + return pte; } -static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) { - pmd_val(*pmd) |= L_PMD_S2_RDWR; + pmd_val(pmd) |= L_PMD_S2_RDWR; + return pmd; } static inline void kvm_set_s2pte_readonly(pte_t *pte) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 74b5d199f6b7..783e5ff0b32e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -977,6 +977,27 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; } +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); + return 1; + } + return 0; +} +#else +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + return __ptep_test_and_clear_young(pte); +} +#endif + +static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pmd); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * @@ -1000,7 +1021,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); if (writable) - kvm_set_s2pte_writable(&pte); + pte = kvm_s2pte_mkwrite(pte); ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, KVM_NR_MEM_OBJS); @@ -1342,7 +1363,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); if (writable) { - kvm_set_s2pmd_writable(&new_pmd); + new_pmd = kvm_s2pmd_mkwrite(new_pmd); kvm_set_pfn_dirty(pfn); } coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); @@ -1351,7 +1372,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pte_t new_pte = pfn_pte(pfn, mem_type); if (writable) { - kvm_set_s2pte_writable(&new_pte); + new_pte = kvm_s2pte_mkwrite(new_pte); kvm_set_pfn_dirty(pfn); mark_page_dirty(kvm, gfn); } @@ -1370,6 +1391,8 @@ out_unlock: * Resolve the access fault by making the page young again. * Note that because the faulting entry is guaranteed not to be * cached in the TLB, we don't need to invalidate anything. + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), + * so there is no need for atomic (pte|pmd)_mkyoung operations. */ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { @@ -1610,25 +1633,14 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; - if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ - if (pmd_young(*pmd)) { - *pmd = pmd_mkold(*pmd); - return 1; - } - - return 0; - } + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ + return stage2_pmdp_test_and_clear_young(pmd); pte = pte_offset_kernel(pmd, gpa); if (pte_none(*pte)) return 0; - if (pte_young(*pte)) { - *pte = pte_mkold(*pte); /* Just a page... */ - return 1; - } - - return 0; + return stage2_ptep_test_and_clear_young(pte); } static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c6cbb361bbcf..ffde15fed3e1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -111,6 +111,8 @@ /* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1 << 31) +#define VTCR_EL2_HD (1 << 22) +#define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK #define VTCR_EL2_TG0_MASK TCR_TG0_MASK #define VTCR_EL2_TG0_4K TCR_TG0_4K diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 249c4fc9c5f6..844fe5d5ff44 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -111,19 +111,32 @@ static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} static inline void kvm_clean_pte_entry(pte_t *pte) {} -static inline void kvm_set_s2pte_writable(pte_t *pte) +static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { - pte_val(*pte) |= PTE_S2_RDWR; + pte_val(pte) |= PTE_S2_RDWR; + return pte; } -static inline void kvm_set_s2pmd_writable(pmd_t *pmd) +static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) { - pmd_val(*pmd) |= PMD_S2_RDWR; + pmd_val(pmd) |= PMD_S2_RDWR; + return pmd; } static inline void kvm_set_s2pte_readonly(pte_t *pte) { - pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; + pteval_t pteval; + unsigned long tmp; + + asm volatile("// kvm_set_s2pte_readonly\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " and %0, %0, %3 // clear PTE_S2_RDWR\n" + " orr %0, %0, %4 // set PTE_S2_RDONLY\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte)) + : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY)); } static inline bool kvm_s2pte_readonly(pte_t *pte) @@ -133,12 +146,12 @@ static inline bool kvm_s2pte_readonly(pte_t *pte) static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) { - pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; + kvm_set_s2pte_readonly((pte_t *)pmd); } static inline bool kvm_s2pmd_readonly(pmd_t *pmd) { - return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; + return kvm_s2pte_readonly((pte_t *)pmd); } static inline bool kvm_page_empty(void *ptr) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dda4aa9ba3f8..f1d5afdb12db 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -532,14 +532,12 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) * Atomic pte/pmd modifications. */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) +static inline int __ptep_test_and_clear_young(pte_t *ptep) { pteval_t pteval; unsigned int tmp, res; - asm volatile("// ptep_test_and_clear_young\n" + asm volatile("// __ptep_test_and_clear_young\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" @@ -552,6 +550,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, return res; } +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) +{ + return __ptep_test_and_clear_young(ptep); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index bcbe761a5a3d..b81f4091c909 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -65,6 +65,14 @@ u32 __hyp_text __init_stage2_translation(void) */ val |= 64 - (parange > 40 ? 40 : parange); + /* + * Check the availability of Hardware Access Flag / Dirty Bit + * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. + */ + tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; + if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp) + val |= VTCR_EL2_HA; + /* * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS * bit in VTCR_EL2. -- cgit v1.2.3-58-ga151