summaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h29
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h47
-rw-r--r--drivers/iommu/amd/init.c110
-rw-r--r--drivers/iommu/amd/io_pgtable.c558
-rw-r--r--drivers/iommu/amd/iommu.c672
-rw-r--r--drivers/iommu/amd/iommu_v2.c4
8 files changed, 794 insertions, 629 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+ select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6b8cbdf71714..026ce7f8d993 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -36,6 +36,7 @@ extern void amd_iommu_disable(void);
extern int amd_iommu_reenable(int);
extern int amd_iommu_enable_faulting(void);
extern int amd_iommu_guest_ir;
+extern enum io_pgtable_fmt amd_iommu_pgtable;
/* IOMMUv2 specific functions */
struct iommu_domain;
@@ -56,6 +57,10 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
@@ -84,12 +89,9 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
(pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
}
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
{
- if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
- return false;
-
- return !!(iommu->features & f);
+ return !!(iommu->features & mask);
}
static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -102,6 +104,21 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
}
+static inline
+void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+ atomic64_set(&domain->iop.pt_root, root);
+ domain->iop.root = (u64 *)(root & PAGE_MASK);
+ domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static inline
+void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+ amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+
extern bool translation_pre_enabled(struct amd_iommu *iommu);
extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev);
@@ -114,4 +131,6 @@ void amd_iommu_apply_ivrs_quirks(void);
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
+extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode);
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 553587827771..6937e3674a16 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/irqreturn.h>
+#include <linux/io-pgtable.h>
/*
* Maximum number of IOMMUs supported
@@ -252,6 +253,19 @@
#define GA_GUEST_NR 0x1
+#define IOMMU_IN_ADDR_BIT_SIZE 52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+
/* Bit value definition for dte irq remapping fields*/
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60)
@@ -387,6 +401,10 @@
#define IOMMU_CAP_NPCACHE 26
#define IOMMU_CAP_EFR 27
+/* IOMMU IVINFO */
+#define IOMMU_IVINFO_OFFSET 36
+#define IOMMU_IVINFO_EFRSUP BIT(0)
+
/* IOMMU Feature Reporting Field (for IVHD type 10h */
#define IOMMU_FEAT_GASUP_SHIFT 6
@@ -466,6 +484,27 @@ struct amd_irte_ops;
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
+#define io_pgtable_to_data(x) \
+ container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+ container_of(io_pgtable_ops_to_data(x), \
+ struct protection_domain, iop)
+
+#define io_pgtable_cfg_to_data(x) \
+ container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+
+struct amd_io_pgtable {
+ struct io_pgtable_cfg pgtbl_cfg;
+ struct io_pgtable iop;
+ int mode;
+ u64 *root;
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -474,9 +513,9 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
iommu core code */
+ struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -484,12 +523,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
-/* For decocded pt_root */
-struct domain_pgtable {
- int mode;
- u64 *root;
-};
-
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 6a1f7048dacc..9126efcbaf2c 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -12,6 +12,7 @@
#include <linux/acpi.h>
#include <linux/list.h>
#include <linux/bitmap.h>
+#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/syscore_ops.h>
#include <linux/interrupt.h>
@@ -147,6 +148,8 @@ struct ivmd_header {
bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
+enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
+
int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
@@ -254,9 +257,13 @@ static enum iommu_init_state init_state = IOMMU_START_STATE;
static int amd_iommu_enable_interrupts(void);
static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(void);
+static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+ u8 fxn, u64 *value, bool is_write);
static bool amd_iommu_pre_enabled = true;
+static u32 amd_iommu_ivinfo __initdata;
+
bool translation_pre_enabled(struct amd_iommu *iommu)
{
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
@@ -296,6 +303,18 @@ int amd_iommu_get_num_iommus(void)
return amd_iommus_present;
}
+/*
+ * For IVHD type 0x11/0x40, EFR is also available via IVHD.
+ * Default to IVHD EFR since it is available sooner
+ * (i.e. before PCI init).
+ */
+static void __init early_iommu_features_init(struct amd_iommu *iommu,
+ struct ivhd_header *h)
+{
+ if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP)
+ iommu->features = h->efr_reg;
+}
+
/* Access to l1 and l2 indexed register spaces */
static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
@@ -1577,6 +1596,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
+
+ early_iommu_features_init(iommu, h);
+
break;
default:
return -EINVAL;
@@ -1695,13 +1717,11 @@ static int __init init_iommu_all(struct acpi_table_header *table)
return 0;
}
-static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value, bool is_write);
-
-static void init_iommu_perf_ctr(struct amd_iommu *iommu)
+static void __init init_iommu_perf_ctr(struct amd_iommu *iommu)
{
+ int retry;
struct pci_dev *pdev = iommu->dev;
- u64 val = 0xabcd, val2 = 0, save_reg = 0;
+ u64 val = 0xabcd, val2 = 0, save_reg, save_src;
if (!iommu_feature(iommu, FEATURE_PC))
return;
@@ -1709,17 +1729,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
amd_iommu_pc_present = true;
/* save the value to restore, if writable */
- if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) ||
+ iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false))
goto pc_false;
- /* Check if the performance counters can be written to */
- if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
- (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
- (val != val2))
+ /*
+ * Disable power gating by programing the performance counter
+ * source to 20 (i.e. counts the reads and writes from/to IOMMU
+ * Reserved Register [MMIO Offset 1FF8h] that are ignored.),
+ * which never get incremented during this init phase.
+ * (Note: The event is also deprecated.)
+ */
+ val = 20;
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true))
goto pc_false;
+ /* Check if the performance counters can be written to */
+ val = 0xabcd;
+ for (retry = 5; retry; retry--) {
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) ||
+ iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) ||
+ val2)
+ break;
+
+ /* Wait about 20 msec for power gating to disable and retry. */
+ msleep(20);
+ }
+
/* restore */
- if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
+ if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) ||
+ iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true))
+ goto pc_false;
+
+ if (val != val2)
goto pc_false;
pci_info(pdev, "IOMMU performance counters supported\n");
@@ -1770,6 +1812,35 @@ static const struct attribute_group *amd_iommu_groups[] = {
NULL,
};
+/*
+ * Note: IVHD 0x11 and 0x40 also contains exact copy
+ * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
+ * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
+ */
+static void __init late_iommu_features_init(struct amd_iommu *iommu)
+{
+ u64 features;
+
+ if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+ return;
+
+ /* read extended feature bits */
+ features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
+
+ if (!iommu->features) {
+ iommu->features = features;
+ return;
+ }
+
+ /*
+ * Sanity check and warn if EFR values from
+ * IVHD and MMIO conflict.
+ */
+ if (features != iommu->features)
+ pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).",
+ features, iommu->features);
+}
+
static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
@@ -1789,8 +1860,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
amd_iommu_iotlb_sup = false;
- /* read extended feature bits */
- iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
+ late_iommu_features_init(iommu);
if (iommu_feature(iommu, FEATURE_GT)) {
int glxval;
@@ -1883,7 +1953,7 @@ static void print_iommu_info(void)
struct pci_dev *pdev = iommu->dev;
int i;
- pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
+ pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
pci_info(pdev, "Extended features (%#llx):",
@@ -1911,7 +1981,7 @@ static void print_iommu_info(void)
static int __init amd_iommu_init_pci(void)
{
struct amd_iommu *iommu;
- int ret = 0;
+ int ret;
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu);
@@ -2607,6 +2677,11 @@ static void __init free_dma_resources(void)
free_unity_maps();
}
+static void __init ivinfo_init(void *ivrs)
+{
+ amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
+}
+
/*
* This is the hardware init function for AMD IOMMU in the system.
* This function is called either from amd_iommu_init or from the interrupt
@@ -2637,8 +2712,8 @@ static void __init free_dma_resources(void)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
+ int i, remap_cache_sz, ret;
acpi_status status;
- int i, remap_cache_sz, ret = 0;
u32 pci_id;
if (!amd_iommu_detected)
@@ -2661,6 +2736,8 @@ static int __init early_amd_iommu_init(void)
if (ret)
goto out;
+ ivinfo_init(ivrs_base);
+
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
@@ -2780,7 +2857,6 @@ static int __init early_amd_iommu_init(void)
out:
/* Don't leak any ACPI memory */
acpi_put_table(ivrs_base);
- ivrs_base = NULL;
return ret;
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index 000000000000..1c4961e05c12
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/io-pgtable.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/barrier.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static void v1_tlb_flush_all(void *cookie)
+{
+}
+
+static void v1_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+}
+
+static const struct iommu_flush_ops v1_flush_ops = {
+ .tlb_flush_all = v1_tlb_flush_all,
+ .tlb_flush_walk = v1_tlb_flush_walk,
+ .tlb_add_page = v1_tlb_add_page,
+};
+
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+ unsigned long *count)
+{
+ unsigned long pte_mask, pg_size, cnt;
+ u64 *fpte;
+
+ pg_size = PTE_PAGE_SIZE(*pte);
+ cnt = PAGE_SIZE_PTE_COUNT(pg_size);
+ pte_mask = ~((cnt << 3) - 1);
+ fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+ if (page_size)
+ *page_size = pg_size;
+
+ if (count)
+ *count = cnt;
+
+ return fpte;
+}
+
+/****************************************************************************
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ ****************************************************************************/
+
+static void free_page_list(struct page *freelist)
+{
+ while (freelist != NULL) {
+ unsigned long p = (unsigned long)page_address(freelist);
+
+ freelist = freelist->freelist;
+ free_page(p);
+ }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+ struct page *p = virt_to_page((void *)pt);
+
+ p->freelist = freelist;
+
+ return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) \
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
+{ \
+ unsigned long p; \
+ u64 *pt; \
+ int i; \
+ \
+ pt = (u64 *)__pt; \
+ \
+ for (i = 0; i < 512; ++i) { \
+ /* PTE present? */ \
+ if (!IOMMU_PTE_PRESENT(pt[i])) \
+ continue; \
+ \
+ /* Large PTE? */ \
+ if (PM_PTE_LEVEL(pt[i]) == 0 || \
+ PM_PTE_LEVEL(pt[i]) == 7) \
+ continue; \
+ \
+ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
+ freelist = FN(p, freelist); \
+ } \
+ \
+ return free_pt_page((unsigned long)pt, freelist); \
+}
+
+DEFINE_FREE_PT_FN(l2, free_pt_page)
+DEFINE_FREE_PT_FN(l3, free_pt_l2)
+DEFINE_FREE_PT_FN(l4, free_pt_l3)
+DEFINE_FREE_PT_FN(l5, free_pt_l4)
+DEFINE_FREE_PT_FN(l6, free_pt_l5)
+
+static struct page *free_sub_pt(unsigned long root, int mode,
+ struct page *freelist)
+{
+ switch (mode) {
+ case PAGE_MODE_NONE:
+ case PAGE_MODE_7_LEVEL:
+ break;
+ case PAGE_MODE_1_LEVEL:
+ freelist = free_pt_page(root, freelist);
+ break;
+ case PAGE_MODE_2_LEVEL:
+ freelist = free_pt_l2(root, freelist);
+ break;
+ case PAGE_MODE_3_LEVEL:
+ freelist = free_pt_l3(root, freelist);
+ break;
+ case PAGE_MODE_4_LEVEL:
+ freelist = free_pt_l4(root, freelist);
+ break;
+ case PAGE_MODE_5_LEVEL:
+ freelist = free_pt_l5(root, freelist);
+ break;
+ case PAGE_MODE_6_LEVEL:
+ freelist = free_pt_l6(root, freelist);
+ break;
+ default:
+ BUG();
+ }
+
+ return freelist;
+}
+
+void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ amd_iommu_domain_set_pt_root(domain, pt_root);
+}
+
+/*
+ * This function is used to add another level to an IO page table. Adding
+ * another level increases the size of the address space by 9 bits to a size up
+ * to 64 bits.
+ */
+static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
+ gfp_t gfp)
+{
+ unsigned long flags;
+ bool ret = true;
+ u64 *pte;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ if (address <= PM_LEVEL_SIZE(domain->iop.mode))
+ goto out;
+
+ ret = false;
+ if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
+ goto out;
+
+ pte = (void *)get_zeroed_page(gfp);
+ if (!pte)
+ goto out;
+
+ *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
+
+ domain->iop.root = pte;
+ domain->iop.mode += 1;
+ amd_iommu_update_and_flush_device_table(domain);
+ amd_iommu_domain_flush_complete(domain);
+
+ /*
+ * Device Table needs to be updated and flushed before the new root can
+ * be published.
+ */
+ amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
+
+ ret = true;
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+
+static u64 *alloc_pte(struct protection_domain *domain,
+ unsigned long address,
+ unsigned long page_size,
+ u64 **pte_page,
+ gfp_t gfp,
+ bool *updated)
+{
+ int level, end_lvl;
+ u64 *pte, *page;
+
+ BUG_ON(!is_power_of_2(page_size));
+
+ while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
+ /*
+ * Return an error if there is no memory to update the
+ * page-table.
+ */
+ if (!increase_address_space(domain, address, gfp))
+ return NULL;
+ }
+
+
+ level = domain->iop.mode - 1;
+ pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+ address = PAGE_SIZE_ALIGN(address, page_size);
+ end_lvl = PAGE_SIZE_LEVEL(page_size);
+
+ while (level > end_lvl) {
+ u64 __pte, __npte;
+ int pte_level;
+
+ __pte = *pte;
+ pte_level = PM_PTE_LEVEL(__pte);
+
+ /*
+ * If we replace a series of large PTEs, we need
+ * to tear down all of them.
+ */
+ if (IOMMU_PTE_PRESENT(__pte) &&
+ pte_level == PAGE_MODE_7_LEVEL) {
+ unsigned long count, i;
+ u64 *lpte;
+
+ lpte = first_pte_l7(pte, NULL, &count);
+
+ /*
+ * Unmap the replicated PTEs that still match the
+ * original large mapping
+ */
+ for (i = 0; i < count; ++i)
+ cmpxchg64(&lpte[i], __pte, 0ULL);
+
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_NONE) {
+ page = (u64 *)get_zeroed_page(gfp);
+
+ if (!page)
+ return NULL;
+
+ __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
+
+ /* pte could have been changed somewhere. */
+ if (cmpxchg64(pte, __pte, __npte) != __pte)
+ free_page((unsigned long)page);
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
+
+ continue;
+ }
+
+ /* No level skipping support yet */
+ if (pte_level != level)
+ return NULL;
+
+ level -= 1;
+
+ pte = IOMMU_PTE_PAGE(__pte);
+
+ if (pte_page && level == end_lvl)
+ *pte_page = pte;
+
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
+ }
+
+ return pte;
+}
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
+{
+ int level;
+ u64 *pte;
+
+ *page_size = 0;
+
+ if (address > PM_LEVEL_SIZE(pgtable->mode))
+ return NULL;
+
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+
+ while (level > 0) {
+
+ /* Not Present */
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ /* Large PTE */
+ if (PM_PTE_LEVEL(*pte) == 7 ||
+ PM_PTE_LEVEL(*pte) == 0)
+ break;
+
+ /* No level skipping support yet */
+ if (PM_PTE_LEVEL(*pte) != level)
+ return NULL;
+
+ level -= 1;
+
+ /* Walk to the next level */
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+ }
+
+ /*
+ * If we have a series of large PTEs, make
+ * sure to return a pointer to the first one.
+ */
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+ pte = first_pte_l7(pte, page_size, NULL);
+
+ return pte;
+}
+
+static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+{
+ unsigned long pt;
+ int mode;
+
+ while (cmpxchg64(pte, pteval, 0) != pteval) {
+ pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
+ pteval = *pte;
+ }
+
+ if (!IOMMU_PTE_PRESENT(pteval))
+ return freelist;
+
+ pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
+ mode = IOMMU_PTE_MODE(pteval);
+
+ return free_sub_pt(pt, mode, freelist);
+}
+
+/*
+ * Generic mapping functions. It maps a physical address into a DMA
+ * address space. It allocates the page table pages if necessary.
+ * In the future it can be extended to a generic mapping function
+ * supporting all features of AMD IOMMU page tables like level skipping
+ * and full 64 bit address spaces.
+ */
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
+ struct page *freelist = NULL;
+ bool updated = false;
+ u64 __pte, *pte;
+ int ret, i, count;
+
+ BUG_ON(!IS_ALIGNED(iova, size));
+ BUG_ON(!IS_ALIGNED(paddr, size));
+
+ ret = -EINVAL;
+ if (!(prot & IOMMU_PROT_MASK))
+ goto out;
+
+ count = PAGE_SIZE_PTE_COUNT(size);
+ pte = alloc_pte(dom, iova, size, NULL, gfp, &updated);
+
+ ret = -ENOMEM;
+ if (!pte)
+ goto out;
+
+ for (i = 0; i < count; ++i)
+ freelist = free_clear_pte(&pte[i], pte[i], freelist);
+
+ if (freelist != NULL)
+ updated = true;
+
+ if (count > 1) {
+ __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
+ __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+ } else
+ __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+
+ if (prot & IOMMU_PROT_IR)
+ __pte |= IOMMU_PTE_IR;
+ if (prot & IOMMU_PROT_IW)
+ __pte |= IOMMU_PTE_IW;
+
+ for (i = 0; i < count; ++i)
+ pte[i] = __pte;
+
+ ret = 0;
+
+out:
+ if (updated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ /*
+ * Flush domain TLB(s) and wait for completion. Any Device-Table
+ * Updates and flushing already happened in
+ * increase_address_space().
+ */
+ amd_iommu_domain_flush_tlb_pde(dom);
+ amd_iommu_domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
+
+ /* Everything flushed out, free pages now */
+ free_page_list(freelist);
+
+ return ret;
+}
+
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long long unmapped;
+ unsigned long unmap_size;
+ u64 *pte;
+
+ BUG_ON(!is_power_of_2(size));
+
+ unmapped = 0;
+
+ while (unmapped < size) {
+ pte = fetch_pte(pgtable, iova, &unmap_size);
+ if (pte) {
+ int i, count;
+
+ count = PAGE_SIZE_PTE_COUNT(unmap_size);
+ for (i = 0; i < count; i++)
+ pte[i] = 0ULL;
+ }
+
+ iova = (iova & ~(unmap_size - 1)) + unmap_size;
+ unmapped += unmap_size;
+ }
+
+ BUG_ON(unmapped && !is_power_of_2(unmapped));
+
+ return unmapped;
+}
+
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long offset_mask, pte_pgsize;
+ u64 *pte, __pte;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return iova;
+
+ pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+ if (!pte || !IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ offset_mask = pte_pgsize - 1;
+ __pte = __sme_clr(*pte & PM_ADDR_MASK);
+
+ return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
+/*
+ * ----------------------------------------------------
+ */
+static void v1_free_pgtable(struct io_pgtable *iop)
+{
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
+ struct protection_domain *dom;
+ struct page *freelist = NULL;
+ unsigned long root;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return;
+
+ dom = container_of(pgtable, struct protection_domain, iop);
+
+ /* Update data structure */
+ amd_iommu_domain_clr_pt_root(dom);
+
+ /* Make changes visible to IOMMUs */
+ amd_iommu_domain_update(dom);
+
+ /* Page-table is not visible to IOMMU anymore, so free it */
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
+
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
+
+ free_page_list(freelist);
+}
+
+static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
+
+ cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES,
+ cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
+ cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
+ cfg->tlb = &v1_flush_ops;
+
+ pgtable->iop.ops.map = iommu_v1_map_page;
+ pgtable->iop.ops.unmap = iommu_v1_unmap_page;
+ pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+
+ return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
+ .alloc = v1_alloc_pgtable,
+ .free = v1_free_pgtable,
+};
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f0adbc48fd17..a69a8b573e40 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -31,6 +31,7 @@
#include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <linux/iova.h>
+#include <linux/io-pgtable.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -57,16 +58,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * 512GB Pages are not supported due to a hardware bug
- */
-#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-
#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
static DEFINE_SPINLOCK(pd_bitmap_lock);
@@ -96,10 +87,7 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
-static void update_domain(struct protection_domain *domain);
static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
/****************************************************************************
*
@@ -151,37 +139,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
-static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
-{
- u64 pt_root = atomic64_read(&domain->pt_root);
-
- pgtable->root = (u64 *)(pt_root & PAGE_MASK);
- pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-}
-
-static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
-{
- atomic64_set(&domain->pt_root, root);
-}
-
-static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
- amd_iommu_domain_set_pt_root(domain, 0);
-}
-
-static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
- u64 *root, int mode)
-{
- u64 pt_root;
-
- /* lowest 3 bits encode pgtable mode */
- pt_root = mode & 7;
- pt_root |= (u64)root;
-
- amd_iommu_domain_set_pt_root(domain, pt_root);
-}
-
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -437,29 +394,6 @@ static void amd_iommu_uninit_device(struct device *dev)
*/
}
-/*
- * Helper function to get the first pte of a large mapping
- */
-static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
- unsigned long *count)
-{
- unsigned long pte_mask, pg_size, cnt;
- u64 *fpte;
-
- pg_size = PTE_PAGE_SIZE(*pte);
- cnt = PAGE_SIZE_PTE_COUNT(pg_size);
- pte_mask = ~((cnt << 3) - 1);
- fpte = (u64 *)(((unsigned long)pte) & pte_mask);
-
- if (page_size)
- *page_size = pg_size;
-
- if (count)
- *count = cnt;
-
- return fpte;
-}
-
/****************************************************************************
*
* Interrupt handling functions
@@ -1335,12 +1269,12 @@ static void domain_flush_pages(struct protection_domain *domain,
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
{
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
{
int i;
@@ -1365,7 +1299,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
}
@@ -1384,443 +1318,6 @@ static void domain_flush_devices(struct protection_domain *domain)
/****************************************************************************
*
- * The functions below are used the create the page table mappings for
- * unity mapped regions.
- *
- ****************************************************************************/
-
-static void free_page_list(struct page *freelist)
-{
- while (freelist != NULL) {
- unsigned long p = (unsigned long)page_address(freelist);
- freelist = freelist->freelist;
- free_page(p);
- }
-}
-
-static struct page *free_pt_page(unsigned long pt, struct page *freelist)
-{
- struct page *p = virt_to_page((void *)pt);
-
- p->freelist = freelist;
-
- return p;
-}
-
-#define DEFINE_FREE_PT_FN(LVL, FN) \
-static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
-{ \
- unsigned long p; \
- u64 *pt; \
- int i; \
- \
- pt = (u64 *)__pt; \
- \
- for (i = 0; i < 512; ++i) { \
- /* PTE present? */ \
- if (!IOMMU_PTE_PRESENT(pt[i])) \
- continue; \
- \
- /* Large PTE? */ \
- if (PM_PTE_LEVEL(pt[i]) == 0 || \
- PM_PTE_LEVEL(pt[i]) == 7) \
- continue; \
- \
- p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
- freelist = FN(p, freelist); \
- } \
- \
- return free_pt_page((unsigned long)pt, freelist); \
-}
-
-DEFINE_FREE_PT_FN(l2, free_pt_page)
-DEFINE_FREE_PT_FN(l3, free_pt_l2)
-DEFINE_FREE_PT_FN(l4, free_pt_l3)
-DEFINE_FREE_PT_FN(l5, free_pt_l4)
-DEFINE_FREE_PT_FN(l6, free_pt_l5)
-
-static struct page *free_sub_pt(unsigned long root, int mode,
- struct page *freelist)
-{
- switch (mode) {
- case PAGE_MODE_NONE:
- case PAGE_MODE_7_LEVEL:
- break;
- case PAGE_MODE_1_LEVEL:
- freelist = free_pt_page(root, freelist);
- break;
- case PAGE_MODE_2_LEVEL:
- freelist = free_pt_l2(root, freelist);
- break;
- case PAGE_MODE_3_LEVEL:
- freelist = free_pt_l3(root, freelist);
- break;
- case PAGE_MODE_4_LEVEL:
- freelist = free_pt_l4(root, freelist);
- break;
- case PAGE_MODE_5_LEVEL:
- freelist = free_pt_l5(root, freelist);
- break;
- case PAGE_MODE_6_LEVEL:
- freelist = free_pt_l6(root, freelist);
- break;
- default:
- BUG();
- }
-
- return freelist;
-}
-
-static void free_pagetable(struct domain_pgtable *pgtable)
-{
- struct page *freelist = NULL;
- unsigned long root;
-
- if (pgtable->mode == PAGE_MODE_NONE)
- return;
-
- BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
- pgtable->mode > PAGE_MODE_6_LEVEL);
-
- root = (unsigned long)pgtable->root;
- freelist = free_sub_pt(root, pgtable->mode, freelist);
-
- free_page_list(freelist);
-}
-
-/*
- * This function is used to add another level to an IO page table. Adding
- * another level increases the size of the address space by 9 bits to a size up
- * to 64 bits.
- */
-static bool increase_address_space(struct protection_domain *domain,
- unsigned long address,
- gfp_t gfp)
-{
- struct domain_pgtable pgtable;
- unsigned long flags;
- bool ret = true;
- u64 *pte;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- if (address <= PM_LEVEL_SIZE(pgtable.mode))
- goto out;
-
- ret = false;
- if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
- goto out;
-
- pte = (void *)get_zeroed_page(gfp);
- if (!pte)
- goto out;
-
- *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
-
- pgtable.root = pte;
- pgtable.mode += 1;
- update_and_flush_device_table(domain, &pgtable);
- domain_flush_complete(domain);
-
- /*
- * Device Table needs to be updated and flushed before the new root can
- * be published.
- */
- amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
-
- ret = true;
-
-out:
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-static u64 *alloc_pte(struct protection_domain *domain,
- unsigned long address,
- unsigned long page_size,
- u64 **pte_page,
- gfp_t gfp,
- bool *updated)
-{
- struct domain_pgtable pgtable;
- int level, end_lvl;
- u64 *pte, *page;
-
- BUG_ON(!is_power_of_2(page_size));
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- while (address > PM_LEVEL_SIZE(pgtable.mode)) {
- /*
- * Return an error if there is no memory to update the
- * page-table.
- */
- if (!increase_address_space(domain, address, gfp))
- return NULL;
-
- /* Read new values to check if update was successful */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- }
-
-
- level = pgtable.mode - 1;
- pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
- address = PAGE_SIZE_ALIGN(address, page_size);
- end_lvl = PAGE_SIZE_LEVEL(page_size);
-
- while (level > end_lvl) {
- u64 __pte, __npte;
- int pte_level;
-
- __pte = *pte;
- pte_level = PM_PTE_LEVEL(__pte);
-
- /*
- * If we replace a series of large PTEs, we need
- * to tear down all of them.
- */
- if (IOMMU_PTE_PRESENT(__pte) &&
- pte_level == PAGE_MODE_7_LEVEL) {
- unsigned long count, i;
- u64 *lpte;
-
- lpte = first_pte_l7(pte, NULL, &count);
-
- /*
- * Unmap the replicated PTEs that still match the
- * original large mapping
- */
- for (i = 0; i < count; ++i)
- cmpxchg64(&lpte[i], __pte, 0ULL);
-
- *updated = true;
- continue;
- }
-
- if (!IOMMU_PTE_PRESENT(__pte) ||
- pte_level == PAGE_MODE_NONE) {
- page = (u64 *)get_zeroed_page(gfp);
-
- if (!page)
- return NULL;
-
- __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
-
- /* pte could have been changed somewhere. */
- if (cmpxchg64(pte, __pte, __npte) != __pte)
- free_page((unsigned long)page);
- else if (IOMMU_PTE_PRESENT(__pte))
- *updated = true;
-
- continue;
- }
-
- /* No level skipping support yet */
- if (pte_level != level)
- return NULL;
-
- level -= 1;
-
- pte = IOMMU_PTE_PAGE(__pte);
-
- if (pte_page && level == end_lvl)
- *pte_page = pte;
-
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- }
-
- return pte;
-}
-
-/*
- * This function checks if there is a PTE for a given dma address. If
- * there is one, it returns the pointer to it.
- */
-static u64 *fetch_pte(struct protection_domain *domain,
- unsigned long address,
- unsigned long *page_size)
-{
- struct domain_pgtable pgtable;
- int level;
- u64 *pte;
-
- *page_size = 0;
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- if (address > PM_LEVEL_SIZE(pgtable.mode))
- return NULL;
-
- level = pgtable.mode - 1;
- pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
-
- while (level > 0) {
-
- /* Not Present */
- if (!IOMMU_PTE_PRESENT(*pte))
- return NULL;
-
- /* Large PTE */
- if (PM_PTE_LEVEL(*pte) == 7 ||
- PM_PTE_LEVEL(*pte) == 0)
- break;
-
- /* No level skipping support yet */
- if (PM_PTE_LEVEL(*pte) != level)
- return NULL;
-
- level -= 1;
-
- /* Walk to the next level */
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
- }
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
- pte = first_pte_l7(pte, page_size, NULL);
-
- return pte;
-}
-
-static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
-{
- unsigned long pt;
- int mode;
-
- while (cmpxchg64(pte, pteval, 0) != pteval) {
- pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
- pteval = *pte;
- }
-
- if (!IOMMU_PTE_PRESENT(pteval))
- return freelist;
-
- pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
- mode = IOMMU_PTE_MODE(pteval);
-
- return free_sub_pt(pt, mode, freelist);
-}
-
-/*
- * Generic mapping functions. It maps a physical address into a DMA
- * address space. It allocates the page table pages if necessary.
- * In the future it can be extended to a generic mapping function
- * supporting all features of AMD IOMMU page tables like level skipping
- * and full 64 bit address spaces.
- */
-static int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp)
-{
- struct page *freelist = NULL;
- bool updated = false;
- u64 __pte, *pte;
- int ret, i, count;
-
- BUG_ON(!IS_ALIGNED(bus_addr, page_size));
- BUG_ON(!IS_ALIGNED(phys_addr, page_size));
-
- ret = -EINVAL;
- if (!(prot & IOMMU_PROT_MASK))
- goto out;
-
- count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
-
- ret = -ENOMEM;
- if (!pte)
- goto out;
-
- for (i = 0; i < count; ++i)
- freelist = free_clear_pte(&pte[i], pte[i], freelist);
-
- if (freelist != NULL)
- updated = true;
-
- if (count > 1) {
- __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
- __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
- } else
- __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
-
- if (prot & IOMMU_PROT_IR)
- __pte |= IOMMU_PTE_IR;
- if (prot & IOMMU_PROT_IW)
- __pte |= IOMMU_PTE_IW;
-
- for (i = 0; i < count; ++i)
- pte[i] = __pte;
-
- ret = 0;
-
-out:
- if (updated) {
- unsigned long flags;
-
- spin_lock_irqsave(&dom->lock, flags);
- /*
- * Flush domain TLB(s) and wait for completion. Any Device-Table
- * Updates and flushing already happened in
- * increase_address_space().
- */
- domain_flush_tlb_pde(dom);
- domain_flush_complete(dom);
- spin_unlock_irqrestore(&dom->lock, flags);
- }
-
- /* Everything flushed out, free pages now */
- free_page_list(freelist);
-
- return ret;
-}
-
-static unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size)
-{
- unsigned long long unmapped;
- unsigned long unmap_size;
- u64 *pte;
-
- BUG_ON(!is_power_of_2(page_size));
-
- unmapped = 0;
-
- while (unmapped < page_size) {
-
- pte = fetch_pte(dom, bus_addr, &unmap_size);
-
- if (pte) {
- int i, count;
-
- count = PAGE_SIZE_PTE_COUNT(unmap_size);
- for (i = 0; i < count; i++)
- pte[i] = 0ULL;
- }
-
- bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
- unmapped += unmap_size;
- }
-
- BUG_ON(unmapped && !is_power_of_2(unmapped));
-
- return unmapped;
-}
-
-/****************************************************************************
- *
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
@@ -1896,17 +1393,16 @@ static void free_gcr3_table(struct protection_domain *domain)
}
static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (pgtable->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(pgtable->root);
+ if (domain->iop.mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(domain->iop.root);
- pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -1979,7 +1475,6 @@ static void clear_dte_entry(u16 devid)
static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
struct amd_iommu *iommu;
bool ats;
@@ -1995,8 +1490,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_cnt += 1;
/* Update device table */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- set_dte_entry(dev_data->devid, domain, &pgtable,
+ set_dte_entry(dev_data->devid, domain,
ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
@@ -2020,10 +1514,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
/* Flush IOTLB */
- domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
/* Wait for the flushes to finish */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2156,9 +1650,9 @@ skip_ats_check:
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
- domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
out:
spin_unlock(&dev_data->lock);
@@ -2303,36 +1797,31 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
*
*****************************************************************************/
-static void update_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain, pgtable,
+ set_dte_entry(dev_data->devid, domain,
dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
}
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
{
- update_device_table(domain, pgtable);
+ update_device_table(domain);
domain_flush_devices(domain);
}
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
-
/* Update device table */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- update_and_flush_device_table(domain, &pgtable);
+ amd_iommu_update_and_flush_device_table(domain);
/* Flush domain TLB(s) and wait for completion */
- domain_flush_tlb_pde(domain);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_complete(domain);
}
int __init amd_iommu_init_api(void)
@@ -2400,22 +1889,19 @@ static void cleanup_domain(struct protection_domain *domain)
static void protection_domain_free(struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
-
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- amd_iommu_domain_clr_pt_root(domain);
- free_pagetable(&pgtable);
+ if (domain->iop.pgtbl_cfg.tlb)
+ free_io_pgtable_ops(&domain->iop.iop.ops);
kfree(domain);
}
-static int protection_domain_init(struct protection_domain *domain, int mode)
+static int protection_domain_init_v1(struct protection_domain *domain, int mode)
{
u64 *pt_root = NULL;
@@ -2438,34 +1924,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode)
return 0;
}
-static struct protection_domain *protection_domain_alloc(int mode)
+static struct protection_domain *protection_domain_alloc(unsigned int type)
{
+ struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
+ int pgtable = amd_iommu_pgtable;
+ int mode = DEFAULT_PGTABLE_LEVEL;
+ int ret;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
- if (protection_domain_init(domain, mode))
+ /*
+ * Force IOMMU v1 page table when iommu=pt and
+ * when allocating domain for pass-through devices.
+ */
+ if (type == IOMMU_DOMAIN_IDENTITY) {
+ pgtable = AMD_IOMMU_V1;
+ mode = PAGE_MODE_NONE;
+ } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+ pgtable = AMD_IOMMU_V1;
+ }
+
+ switch (pgtable) {
+ case AMD_IOMMU_V1:
+ ret = protection_domain_init_v1(domain, mode);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
goto out_err;
- return domain;
+ pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
+ if (!pgtbl_ops)
+ goto out_err;
+ return domain;
out_err:
kfree(domain);
-
return NULL;
}
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *domain;
- int mode = DEFAULT_PGTABLE_LEVEL;
-
- if (type == IOMMU_DOMAIN_IDENTITY)
- mode = PAGE_MODE_NONE;
- domain = protection_domain_alloc(mode);
+ domain = protection_domain_alloc(type);
if (!domain)
return NULL;
@@ -2580,12 +2087,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
- int ret;
+ int ret = -EINVAL;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ (domain->iop.mode == PAGE_MODE_NONE))
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -2593,9 +2100,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
-
- domain_flush_np_cache(domain, iova, page_size);
+ if (ops->map) {
+ ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
+ domain_flush_np_cache(domain, iova, page_size);
+ }
return ret;
}
@@ -2605,36 +2113,22 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ (domain->iop.mode == PAGE_MODE_NONE))
return 0;
- return iommu_unmap_page(domain, iova, page_size);
+ return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
dma_addr_t iova)
{
struct protection_domain *domain = to_pdomain(dom);
- unsigned long offset_mask, pte_pgsize;
- struct domain_pgtable pgtable;
- u64 *pte, __pte;
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
- return iova;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- pte = fetch_pte(domain, iova, &pte_pgsize);
-
- if (!pte || !IOMMU_PTE_PRESENT(*pte))
- return 0;
-
- offset_mask = pte_pgsize - 1;
- __pte = __sme_clr(*pte & PM_ADDR_MASK);
-
- return (__pte & ~offset_mask) | (iova & offset_mask);
+ return ops->iova_to_phys(ops, iova);
}
static bool amd_iommu_capable(enum iommu_cap cap)
@@ -2720,8 +2214,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- domain_flush_tlb_pde(dom);
- domain_flush_complete(dom);
+ amd_iommu_domain_flush_tlb_pde(dom);
+ amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -2799,22 +2293,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- /* First save pgtable configuration*/
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- /* Remove page-table from domain */
- amd_iommu_domain_clr_pt_root(domain);
-
- /* Make changes visible to IOMMUs */
- update_domain(domain);
-
- /* Page-table is not visible to IOMMU anymore, so free it */
- free_pagetable(&pgtable);
+ if (domain->iop.pgtbl_cfg.tlb)
+ free_io_pgtable_ops(&domain->iop.iop.ops);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -2855,7 +2339,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
- update_domain(domain);
+ amd_iommu_domain_update(domain);
ret = 0;
@@ -2892,7 +2376,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
}
/* Wait until IOMMU TLB flushes are complete */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
/* Now flush device TLBs */
list_for_each_entry(dev_data, &domain->dev_list, list) {
@@ -2918,7 +2402,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
}
/* Wait until all device TLBs are flushed */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
ret = 0;
@@ -3003,11 +2487,9 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
static int __set_gcr3(struct protection_domain *domain, u32 pasid,
unsigned long cr3)
{
- struct domain_pgtable pgtable;
u64 *pte;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode != PAGE_MODE_NONE)
+ if (domain->iop.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -3021,11 +2503,9 @@ static int __set_gcr3(struct protection_domain *domain, u32 pasid,
static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
{
- struct domain_pgtable pgtable;
u64 *pte;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode != PAGE_MODE_NONE)
+ if (domain->iop.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 5ecc0bc608ec..f8d4ad421e07 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -77,7 +77,7 @@ struct fault {
};
static LIST_HEAD(state_list);
-static spinlock_t state_lock;
+static DEFINE_SPINLOCK(state_lock);
static struct workqueue_struct *iommu_wq;
@@ -938,8 +938,6 @@ static int __init amd_iommu_v2_init(void)
return 0;
}
- spin_lock_init(&state_lock);
-
ret = -ENOMEM;
iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
if (iommu_wq == NULL)