summaryrefslogtreecommitdiff
path: root/arch/x86/mm/mem_encrypt.c
diff options
context:
space:
mode:
authorTom Lendacky <thomas.lendacky@amd.com>2017-07-17 16:10:32 -0500
committerIngo Molnar <mingo@kernel.org>2017-07-18 11:38:05 +0200
commit6ebcb060713f614c92216482eed501b31cee74ec (patch)
tree0d09b269dd7e14a122e2728612e8eed95955d056 /arch/x86/mm/mem_encrypt.c
parentdb516997a985b461f021d594e78155bbc7fc3e7e (diff)
x86/mm: Add support to encrypt the kernel in-place
Add the support to encrypt the kernel in-place. This is done by creating new page mappings for the kernel - a decrypted write-protected mapping and an encrypted mapping. The kernel is encrypted by copying it through a temporary buffer. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/c039bf9412ef95e1e6bf4fdf8facab95e00c717b.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/mm/mem_encrypt.c')
-rw-r--r--arch/x86/mm/mem_encrypt.c310
1 files changed, 310 insertions, 0 deletions
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index a7400ec8538b..e5d543938b0f 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -21,6 +21,8 @@
#include <asm/setup.h>
#include <asm/bootparam.h>
#include <asm/set_memory.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
/*
* Since SME related variables are set early in the boot process they must
@@ -199,8 +201,316 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}
+static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
+ unsigned long end)
+{
+ unsigned long pgd_start, pgd_end, pgd_size;
+ pgd_t *pgd_p;
+
+ pgd_start = start & PGDIR_MASK;
+ pgd_end = end & PGDIR_MASK;
+
+ pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
+ pgd_size *= sizeof(pgd_t);
+
+ pgd_p = pgd_base + pgd_index(start);
+
+ memset(pgd_p, 0, pgd_size);
+}
+
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
+ unsigned long vaddr, pmdval_t pmd_val)
+{
+ pgd_t *pgd_p;
+ p4d_t *p4d_p;
+ pud_t *pud_p;
+ pmd_t *pmd_p;
+
+ pgd_p = pgd_base + pgd_index(vaddr);
+ if (native_pgd_val(*pgd_p)) {
+ if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
+ else
+ pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pgd_t pgd;
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_p = pgtable_area;
+ memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+ pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+
+ pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
+ } else {
+ pud_p = pgtable_area;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+ pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+ pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
+ }
+ native_set_pgd(pgd_p, pgd);
+ }
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_p += p4d_index(vaddr);
+ if (native_p4d_val(*p4d_p)) {
+ pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
+ } else {
+ p4d_t p4d;
+
+ pud_p = pgtable_area;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+ pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+ p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
+ native_set_p4d(p4d_p, p4d);
+ }
+ }
+
+ pud_p += pud_index(vaddr);
+ if (native_pud_val(*pud_p)) {
+ if (native_pud_val(*pud_p) & _PAGE_PSE)
+ goto out;
+
+ pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pud_t pud;
+
+ pmd_p = pgtable_area;
+ memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
+ pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+
+ pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
+ native_set_pud(pud_p, pud);
+ }
+
+ pmd_p += pmd_index(vaddr);
+ if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
+ native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+
+out:
+ return pgtable_area;
+}
+
+static unsigned long __init sme_pgtable_calc(unsigned long len)
+{
+ unsigned long p4d_size, pud_size, pmd_size;
+ unsigned long total;
+
+ /*
+ * Perform a relatively simplistic calculation of the pagetable
+ * entries that are needed. That mappings will be covered by 2MB
+ * PMD entries so we can conservatively calculate the required
+ * number of P4D, PUD and PMD structures needed to perform the
+ * mappings. Incrementing the count for each covers the case where
+ * the addresses cross entries.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
+ p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
+ pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ } else {
+ p4d_size = 0;
+ pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ }
+ pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
+ pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+
+ total = p4d_size + pud_size + pmd_size;
+
+ /*
+ * Now calculate the added pagetable structures needed to populate
+ * the new pagetables.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
+ p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
+ pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ } else {
+ p4d_size = 0;
+ pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ }
+ pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
+ pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+
+ total += p4d_size + pud_size + pmd_size;
+
+ return total;
+}
+
void __init sme_encrypt_kernel(void)
{
+ unsigned long workarea_start, workarea_end, workarea_len;
+ unsigned long execute_start, execute_end, execute_len;
+ unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long pgtable_area_len;
+ unsigned long paddr, pmd_flags;
+ unsigned long decrypted_base;
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ if (!sme_active())
+ return;
+
+ /*
+ * Prepare for encrypting the kernel by building new pagetables with
+ * the necessary attributes needed to encrypt the kernel in place.
+ *
+ * One range of virtual addresses will map the memory occupied
+ * by the kernel as encrypted.
+ *
+ * Another range of virtual addresses will map the memory occupied
+ * by the kernel as decrypted and write-protected.
+ *
+ * The use of write-protect attribute will prevent any of the
+ * memory from being cached.
+ */
+
+ /* Physical addresses gives us the identity mapped virtual addresses */
+ kernel_start = __pa_symbol(_text);
+ kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+ kernel_len = kernel_end - kernel_start;
+
+ /* Set the encryption workarea to be immediately after the kernel */
+ workarea_start = kernel_end;
+
+ /*
+ * Calculate required number of workarea bytes needed:
+ * executable encryption area size:
+ * stack page (PAGE_SIZE)
+ * encryption routine page (PAGE_SIZE)
+ * intermediate copy buffer (PMD_PAGE_SIZE)
+ * pagetable structures for the encryption of the kernel
+ * pagetable structures for workarea (in case not currently mapped)
+ */
+ execute_start = workarea_start;
+ execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+ execute_len = execute_end - execute_start;
+
+ /*
+ * One PGD for both encrypted and decrypted mappings and a set of
+ * PUDs and PMDs for each of the encrypted and decrypted mappings.
+ */
+ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
+ pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+
+ /* PUDs and PMDs needed in the current pagetables for the workarea */
+ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
+
+ /*
+ * The total workarea includes the executable encryption area and
+ * the pagetable area.
+ */
+ workarea_len = execute_len + pgtable_area_len;
+ workarea_end = workarea_start + workarea_len;
+
+ /*
+ * Set the address to the start of where newly created pagetable
+ * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
+ * structures are created when the workarea is added to the current
+ * pagetables and when the new encrypted and decrypted kernel
+ * mappings are populated.
+ */
+ pgtable_area = (void *)execute_end;
+
+ /*
+ * Make sure the current pagetable structure has entries for
+ * addressing the workarea.
+ */
+ pgd = (pgd_t *)native_read_cr3_pa();
+ paddr = workarea_start;
+ while (paddr < workarea_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr,
+ paddr + PMD_FLAGS);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+
+ /*
+ * A new pagetable structure is being built to allow for the kernel
+ * to be encrypted. It starts with an empty PGD that will then be
+ * populated with new PUDs and PMDs as the encrypted and decrypted
+ * kernel mappings are created.
+ */
+ pgd = pgtable_area;
+ memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
+ pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
+
+ /* Add encrypted kernel (identity) mappings */
+ pmd_flags = PMD_FLAGS | _PAGE_ENC;
+ paddr = kernel_start;
+ while (paddr < kernel_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr,
+ paddr + pmd_flags);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /*
+ * A different PGD index/entry must be used to get different
+ * pagetable entries for the decrypted mapping. Choose the next
+ * PGD index and convert it to a virtual address to be used as
+ * the base of the mapping.
+ */
+ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base <<= PGDIR_SHIFT;
+
+ /* Add decrypted, write-protected kernel (non-identity) mappings */
+ pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
+ paddr = kernel_start;
+ while (paddr < kernel_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr + decrypted_base,
+ paddr + pmd_flags);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /* Add decrypted workarea mappings to both kernel mappings */
+ paddr = workarea_start;
+ while (paddr < workarea_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr,
+ paddr + PMD_FLAGS);
+
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr + decrypted_base,
+ paddr + PMD_FLAGS);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /* Perform the encryption */
+ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
+ kernel_len, workarea_start, (unsigned long)pgd);
+
+ /*
+ * At this point we are running encrypted. Remove the mappings for
+ * the decrypted areas - all that is needed for this is to remove
+ * the PGD entry/entries.
+ */
+ sme_clear_pgd(pgd, kernel_start + decrypted_base,
+ kernel_end + decrypted_base);
+
+ sme_clear_pgd(pgd, workarea_start + decrypted_base,
+ workarea_end + decrypted_base);
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
}
void __init sme_enable(void)