summaryrefslogtreecommitdiff
path: root/arch/x86/coco
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2024-06-14 12:58:56 +0300
committerBorislav Petkov (AMD) <bp@alien8.de>2024-06-17 17:46:05 +0200
commit859e63b789d6b17b3c64e51a0aabdc58752a0254 (patch)
treeb58c951feacd6b8110f34db71051d1cd53609391 /arch/x86/coco
parent22daa42294b419a0d8060a3870285e7a72aa63e4 (diff)
x86/tdx: Convert shared memory back to private on kexec
TDX guests allocate shared buffers to perform I/O. It is done by allocating pages normally from the buddy allocator and converting them to shared with set_memory_decrypted(). The second, kexec-ed kernel has no idea what memory is converted this way. It only sees E820_TYPE_RAM. Accessing shared memory via private mapping is fatal. It leads to unrecoverable TD exit. On kexec, walk direct mapping and convert all shared memory back to private. It makes all RAM private again and second kernel may use it normally. The conversion occurs in two steps: stopping new conversions and unsharing all memory. In the case of normal kexec, the stopping of conversions takes place while scheduling is still functioning. This allows for waiting until any ongoing conversions are finished. The second step is carried out when all CPUs except one are inactive and interrupts are disabled. This prevents any conflicts with code that may access shared memory. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Reviewed-by: Kai Huang <kai.huang@intel.com> Tested-by: Tao Liu <ltao@redhat.com> Link: https://lore.kernel.org/r/20240614095904.1345461-12-kirill.shutemov@linux.intel.com
Diffstat (limited to 'arch/x86/coco')
-rw-r--r--arch/x86/coco/tdx/tdx.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 979891e97d83..078e2bac2553 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@
#include <linux/cpufeature.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kexec.h>
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
@@ -14,6 +15,7 @@
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
+#include <asm/set_memory.h>
/* MMIO direction */
#define EPT_READ 0
@@ -831,6 +833,95 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
return 0;
}
+/* Stop new private<->shared conversions */
+static void tdx_kexec_begin(void)
+{
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ /*
+ * Crash kernel reaches here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!set_memory_enc_stop_conversion())
+ pr_warn("Failed to stop shared<->private conversions\n");
+}
+
+/* Walk direct mapping and convert all shared memory back to private */
+static void tdx_kexec_finish(void)
+{
+ unsigned long addr, end;
+ long found = 0, shared;
+
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ lockdep_assert_irqs_disabled();
+
+ addr = PAGE_OFFSET;
+ end = PAGE_OFFSET + get_max_mapped();
+
+ while (addr < end) {
+ unsigned long size;
+ unsigned int level;
+ pte_t *pte;
+
+ pte = lookup_address(addr, &level);
+ size = page_level_size(level);
+
+ if (pte && pte_decrypted(*pte)) {
+ int pages = size / PAGE_SIZE;
+
+ /*
+ * Touching memory with shared bit set triggers implicit
+ * conversion to shared.
+ *
+ * Make sure nobody touches the shared range from
+ * now on.
+ */
+ set_pte(pte, __pte(0));
+
+ /*
+ * Memory encryption state persists across kexec.
+ * If tdx_enc_status_changed() fails in the first
+ * kernel, it leaves memory in an unknown state.
+ *
+ * If that memory remains shared, accessing it in the
+ * *next* kernel through a private mapping will result
+ * in an unrecoverable guest shutdown.
+ *
+ * The kdump kernel boot is not impacted as it uses
+ * a pre-reserved memory range that is always private.
+ * However, gathering crash information could lead to
+ * a crash if it accesses unconverted memory through
+ * a private mapping which is possible when accessing
+ * that memory through /proc/vmcore, for example.
+ *
+ * In all cases, print error info in order to leave
+ * enough bread crumbs for debugging.
+ */
+ if (!tdx_enc_status_changed(addr, pages, true)) {
+ pr_err("Failed to unshare range %#lx-%#lx\n",
+ addr, addr + size);
+ }
+
+ found += pages;
+ }
+
+ addr += size;
+ }
+
+ __flush_tlb_all();
+
+ shared = atomic_long_read(&nr_shared);
+ if (shared != found) {
+ pr_err("shared page accounting is off\n");
+ pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
+ }
+}
+
void __init tdx_early_init(void)
{
struct tdx_module_args args = {
@@ -890,6 +981,9 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+
/*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled