diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-06 10:49:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-06 10:49:01 -0700 |
commit | 6218590bcb452c3da7517d02b588d4d0a8628f73 (patch) | |
tree | 8b6a285052ac999e0e36e04f0c1e6bbfb46e84c4 /drivers/iommu | |
parent | 14986a34e1289424811443a524cdd9e1688c7913 (diff) | |
parent | d9ab710b85310e4ba9295f2b494eda54cf1a355a (diff) |
Merge tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"All architectures:
- move `make kvmconfig` stubs from x86
- use 64 bits for debugfs stats
ARM:
- Important fixes for not using an in-kernel irqchip
- handle SError exceptions and present them to guests if appropriate
- proxying of GICV access at EL2 if guest mappings are unsafe
- GICv3 on AArch32 on ARMv8
- preparations for GICv3 save/restore, including ABI docs
- cleanups and a bit of optimizations
MIPS:
- A couple of fixes in preparation for supporting MIPS EVA host
kernels
- MIPS SMP host & TLB invalidation fixes
PPC:
- Fix the bug which caused guests to falsely report lockups
- other minor fixes
- a small optimization
s390:
- Lazy enablement of runtime instrumentation
- up to 255 CPUs for nested guests
- rework of machine check deliver
- cleanups and fixes
x86:
- IOMMU part of AMD's AVIC for vmexit-less interrupt delivery
- Hyper-V TSC page
- per-vcpu tsc_offset in debugfs
- accelerated INS/OUTS in nVMX
- cleanups and fixes"
* tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (140 commits)
KVM: MIPS: Drop dubious EntryHi optimisation
KVM: MIPS: Invalidate TLB by regenerating ASIDs
KVM: MIPS: Split kernel/user ASID regeneration
KVM: MIPS: Drop other CPU ASIDs on guest MMU changes
KVM: arm/arm64: vgic: Don't flush/sync without a working vgic
KVM: arm64: Require in-kernel irqchip for PMU support
KVM: PPC: Book3s PR: Allow access to unprivileged MMCR2 register
KVM: PPC: Book3S PR: Support 64kB page size on POWER8E and POWER8NVL
KVM: PPC: Book3S: Remove duplicate setting of the B field in tlbie
KVM: PPC: BookE: Fix a sanity check
KVM: PPC: Book3S HV: Take out virtual core piggybacking code
KVM: PPC: Book3S: Treat VTB as a per-subcore register, not per-thread
ARM: gic-v3: Work around definition of gic_write_bpr1
KVM: nVMX: Fix the NMI IDT-vectoring handling
KVM: VMX: Enable MSR-BASED TPR shadow even if APICv is inactive
KVM: nVMX: Fix reload apic access page warning
kvmconfig: add virtio-gpu to config fragment
config: move x86 kvm_guest.config to a common location
arm64: KVM: Remove duplicating init code for setting VMID
ARM: KVM: Support vgic-v3
...
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/amd_iommu.c | 484 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 181 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_proto.h | 1 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 149 |
4 files changed, 752 insertions, 63 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4025291ea0ae..58fa8cc0262b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -137,6 +137,7 @@ struct iommu_dev_data { bool pri_tlp; /* PASID TLB required for PPR completions */ u32 errata; /* Bitmap for errata to apply */ + bool use_vapic; /* Enable device to use vapic mode */ }; /* @@ -707,14 +708,74 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) } } +#ifdef CONFIG_IRQ_REMAP +static int (*iommu_ga_log_notifier)(u32); + +int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) +{ + iommu_ga_log_notifier = notifier; + + return 0; +} +EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); + +static void iommu_poll_ga_log(struct amd_iommu *iommu) +{ + u32 head, tail, cnt = 0; + + if (iommu->ga_log == NULL) + return; + + head = readl(iommu->mmio_base + MMIO_GA_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_GA_TAIL_OFFSET); + + while (head != tail) { + volatile u64 *raw; + u64 log_entry; + + raw = (u64 *)(iommu->ga_log + head); + cnt++; + + /* Avoid memcpy function-call overhead */ + log_entry = *raw; + + /* Update head pointer of hardware ring-buffer */ + head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE; + writel(head, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); + + /* Handle GA entry */ + switch (GA_REQ_TYPE(log_entry)) { + case GA_GUEST_NR: + if (!iommu_ga_log_notifier) + break; + + pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n", + __func__, GA_DEVID(log_entry), + GA_TAG(log_entry)); + + if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0) + pr_err("AMD-Vi: GA log notifier failed.\n"); + break; + default: + break; + } + } +} +#endif /* CONFIG_IRQ_REMAP */ + +#define AMD_IOMMU_INT_MASK \ + (MMIO_STATUS_EVT_INT_MASK | \ + MMIO_STATUS_PPR_INT_MASK | \ + MMIO_STATUS_GALOG_INT_MASK) + irqreturn_t amd_iommu_int_thread(int irq, void *data) { struct amd_iommu *iommu = (struct amd_iommu *) data; u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) { - /* Enable EVT and PPR interrupts again */ - writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK), + while (status & AMD_IOMMU_INT_MASK) { + /* Enable EVT and PPR and GA interrupts again */ + writel(AMD_IOMMU_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & MMIO_STATUS_EVT_INT_MASK) { @@ -727,6 +788,13 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) iommu_poll_ppr_log(iommu); } +#ifdef CONFIG_IRQ_REMAP + if (status & MMIO_STATUS_GALOG_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU GA Log\n"); + iommu_poll_ga_log(iommu); + } +#endif + /* * Hardware bug: ERBT1312 * When re-enabling interrupt (by writing 1 @@ -2967,6 +3035,12 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (!iommu) return; +#ifdef CONFIG_IRQ_REMAP + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + (dom->type == IOMMU_DOMAIN_UNMANAGED)) + dev_data->use_vapic = 0; +#endif + iommu_completion_wait(iommu); } @@ -2992,6 +3066,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, ret = attach_device(dev, domain); +#ifdef CONFIG_IRQ_REMAP + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { + if (dom->type == IOMMU_DOMAIN_UNMANAGED) + dev_data->use_vapic = 1; + else + dev_data->use_vapic = 0; + } +#endif + iommu_completion_wait(iommu); return ret; @@ -3530,34 +3613,6 @@ EXPORT_SYMBOL(amd_iommu_device_info); * *****************************************************************************/ -union irte { - u32 val; - struct { - u32 valid : 1, - no_fault : 1, - int_type : 3, - rq_eoi : 1, - dm : 1, - rsvd_1 : 1, - destination : 8, - vector : 8, - rsvd_2 : 8; - } fields; -}; - -struct irq_2_irte { - u16 devid; /* Device ID for IRTE table */ - u16 index; /* Index into IRTE table*/ -}; - -struct amd_ir_data { - struct irq_2_irte irq_2_irte; - union irte irte_entry; - union { - struct msi_msg msi_entry; - }; -}; - static struct irq_chip amd_ir_chip; #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) @@ -3579,8 +3634,6 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) amd_iommu_dev_table[devid].data[2] = dte; } -#define IRTE_ALLOCATED (~1U) - static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) { struct irq_remap_table *table = NULL; @@ -3626,13 +3679,18 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) goto out; } - memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32)); + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + memset(table->table, 0, + MAX_IRQS_PER_TABLE * sizeof(u32)); + else + memset(table->table, 0, + (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); if (ioapic) { int i; for (i = 0; i < 32; ++i) - table->table[i] = IRTE_ALLOCATED; + iommu->irte_ops->set_allocated(table, i); } irq_lookup_table[devid] = table; @@ -3658,6 +3716,10 @@ static int alloc_irq_index(u16 devid, int count) struct irq_remap_table *table; unsigned long flags; int index, c; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!iommu) + return -ENODEV; table = get_irq_table(devid, false); if (!table) @@ -3669,14 +3731,14 @@ static int alloc_irq_index(u16 devid, int count) for (c = 0, index = table->min_index; index < MAX_IRQS_PER_TABLE; ++index) { - if (table->table[index] == 0) + if (!iommu->irte_ops->is_allocated(table, index)) c += 1; else c = 0; if (c == count) { for (; c != 0; --c) - table->table[index - c + 1] = IRTE_ALLOCATED; + iommu->irte_ops->set_allocated(table, index - c + 1); index -= count - 1; goto out; @@ -3691,7 +3753,42 @@ out: return index; } -static int modify_irte(u16 devid, int index, union irte irte) +static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, + struct amd_ir_data *data) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + struct irte_ga *entry; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return -EINVAL; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + + entry = (struct irte_ga *)table->table; + entry = &entry[index]; + entry->lo.fields_remap.valid = 0; + entry->hi.val = irte->hi.val; + entry->lo.val = irte->lo.val; + entry->lo.fields_remap.valid = 1; + if (data) + data->ref = entry; + + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + + return 0; +} + +static int modify_irte(u16 devid, int index, union irte *irte) { struct irq_remap_table *table; struct amd_iommu *iommu; @@ -3706,7 +3803,7 @@ static int modify_irte(u16 devid, int index, union irte irte) return -ENOMEM; spin_lock_irqsave(&table->lock, flags); - table->table[index] = irte.val; + table->table[index] = irte->val; spin_unlock_irqrestore(&table->lock, flags); iommu_flush_irt(iommu, devid); @@ -3730,13 +3827,146 @@ static void free_irte(u16 devid, int index) return; spin_lock_irqsave(&table->lock, flags); - table->table[index] = 0; + iommu->irte_ops->clear_allocated(table, index); spin_unlock_irqrestore(&table->lock, flags); iommu_flush_irt(iommu, devid); iommu_completion_wait(iommu); } +static void irte_prepare(void *entry, + u32 delivery_mode, u32 dest_mode, + u8 vector, u32 dest_apicid, int devid) +{ + union irte *irte = (union irte *) entry; + + irte->val = 0; + irte->fields.vector = vector; + irte->fields.int_type = delivery_mode; + irte->fields.destination = dest_apicid; + irte->fields.dm = dest_mode; + irte->fields.valid = 1; +} + +static void irte_ga_prepare(void *entry, + u32 delivery_mode, u32 dest_mode, + u8 vector, u32 dest_apicid, int devid) +{ + struct irte_ga *irte = (struct irte_ga *) entry; + struct iommu_dev_data *dev_data = search_dev_data(devid); + + irte->lo.val = 0; + irte->hi.val = 0; + irte->lo.fields_remap.guest_mode = dev_data ? dev_data->use_vapic : 0; + irte->lo.fields_remap.int_type = delivery_mode; + irte->lo.fields_remap.dm = dest_mode; + irte->hi.fields.vector = vector; + irte->lo.fields_remap.destination = dest_apicid; + irte->lo.fields_remap.valid = 1; +} + +static void irte_activate(void *entry, u16 devid, u16 index) +{ + union irte *irte = (union irte *) entry; + + irte->fields.valid = 1; + modify_irte(devid, index, irte); +} + +static void irte_ga_activate(void *entry, u16 devid, u16 index) +{ + struct irte_ga *irte = (struct irte_ga *) entry; + + irte->lo.fields_remap.valid = 1; + modify_irte_ga(devid, index, irte, NULL); +} + +static void irte_deactivate(void *entry, u16 devid, u16 index) +{ + union irte *irte = (union irte *) entry; + + irte->fields.valid = 0; + modify_irte(devid, index, irte); +} + +static void irte_ga_deactivate(void *entry, u16 devid, u16 index) +{ + struct irte_ga *irte = (struct irte_ga *) entry; + + irte->lo.fields_remap.valid = 0; + modify_irte_ga(devid, index, irte, NULL); +} + +static void irte_set_affinity(void *entry, u16 devid, u16 index, + u8 vector, u32 dest_apicid) +{ + union irte *irte = (union irte *) entry; + + irte->fields.vector = vector; + irte->fields.destination = dest_apicid; + modify_irte(devid, index, irte); +} + +static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, + u8 vector, u32 dest_apicid) +{ + struct irte_ga *irte = (struct irte_ga *) entry; + struct iommu_dev_data *dev_data = search_dev_data(devid); + + if (!dev_data || !dev_data->use_vapic) { + irte->hi.fields.vector = vector; + irte->lo.fields_remap.destination = dest_apicid; + irte->lo.fields_remap.guest_mode = 0; + modify_irte_ga(devid, index, irte, NULL); + } +} + +#define IRTE_ALLOCATED (~1U) +static void irte_set_allocated(struct irq_remap_table *table, int index) +{ + table->table[index] = IRTE_ALLOCATED; +} + +static void irte_ga_set_allocated(struct irq_remap_table *table, int index) +{ + struct irte_ga *ptr = (struct irte_ga *)table->table; + struct irte_ga *irte = &ptr[index]; + + memset(&irte->lo.val, 0, sizeof(u64)); + memset(&irte->hi.val, 0, sizeof(u64)); + irte->hi.fields.vector = 0xff; +} + +static bool irte_is_allocated(struct irq_remap_table *table, int index) +{ + union irte *ptr = (union irte *)table->table; + union irte *irte = &ptr[index]; + + return irte->val != 0; +} + +static bool irte_ga_is_allocated(struct irq_remap_table *table, int index) +{ + struct irte_ga *ptr = (struct irte_ga *)table->table; + struct irte_ga *irte = &ptr[index]; + + return irte->hi.fields.vector != 0; +} + +static void irte_clear_allocated(struct irq_remap_table *table, int index) +{ + table->table[index] = 0; +} + +static void irte_ga_clear_allocated(struct irq_remap_table *table, int index) +{ + struct irte_ga *ptr = (struct irte_ga *)table->table; + struct irte_ga *irte = &ptr[index]; + + memset(&irte->lo.val, 0, sizeof(u64)); + memset(&irte->hi.val, 0, sizeof(u64)); +} + static int get_devid(struct irq_alloc_info *info) { int devid = -1; @@ -3821,19 +4051,17 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, { struct irq_2_irte *irte_info = &data->irq_2_irte; struct msi_msg *msg = &data->msi_entry; - union irte *irte = &data->irte_entry; struct IO_APIC_route_entry *entry; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (!iommu) + return; data->irq_2_irte.devid = devid; data->irq_2_irte.index = index + sub_handle; - - /* Setup IRTE for IOMMU */ - irte->val = 0; - irte->fields.vector = irq_cfg->vector; - irte->fields.int_type = apic->irq_delivery_mode; - irte->fields.destination = irq_cfg->dest_apicid; - irte->fields.dm = apic->irq_dest_mode; - irte->fields.valid = 1; + iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode, + apic->irq_dest_mode, irq_cfg->vector, + irq_cfg->dest_apicid, devid); switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: @@ -3864,12 +4092,32 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, } } +struct amd_irte_ops irte_32_ops = { + .prepare = irte_prepare, + .activate = irte_activate, + .deactivate = irte_deactivate, + .set_affinity = irte_set_affinity, + .set_allocated = irte_set_allocated, + .is_allocated = irte_is_allocated, + .clear_allocated = irte_clear_allocated, +}; + +struct amd_irte_ops irte_128_ops = { + .prepare = irte_ga_prepare, + .activate = irte_ga_activate, + .deactivate = irte_ga_deactivate, + .set_affinity = irte_ga_set_affinity, + .set_allocated = irte_ga_set_allocated, + .is_allocated = irte_ga_is_allocated, + .clear_allocated = irte_ga_clear_allocated, +}; + static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { struct irq_alloc_info *info = arg; struct irq_data *irq_data; - struct amd_ir_data *data; + struct amd_ir_data *data = NULL; struct irq_cfg *cfg; int i, ret, devid; int index = -1; @@ -3921,6 +4169,16 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (!data) goto out_free_data; + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + data->entry = kzalloc(sizeof(union irte), GFP_KERNEL); + else + data->entry = kzalloc(sizeof(struct irte_ga), + GFP_KERNEL); + if (!data->entry) { + kfree(data); + goto out_free_data; + } + irq_data->hwirq = (devid << 16) + i; irq_data->chip_data = data; irq_data->chip = &amd_ir_chip; @@ -3957,6 +4215,7 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, data = irq_data->chip_data; irte_info = &data->irq_2_irte; free_irte(irte_info->devid, irte_info->index); + kfree(data->entry); kfree(data); } } @@ -3968,8 +4227,11 @@ static void irq_remapping_activate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; + struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; - modify_irte(irte_info->devid, irte_info->index, data->irte_entry); + if (iommu) + iommu->irte_ops->activate(data->entry, irte_info->devid, + irte_info->index); } static void irq_remapping_deactivate(struct irq_domain *domain, @@ -3977,10 +4239,11 @@ static void irq_remapping_deactivate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - union irte entry; + struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; - entry.val = 0; - modify_irte(irte_info->devid, irte_info->index, data->irte_entry); + if (iommu) + iommu->irte_ops->deactivate(data->entry, irte_info->devid, + irte_info->index); } static struct irq_domain_ops amd_ir_domain_ops = { @@ -3990,6 +4253,70 @@ static struct irq_domain_ops amd_ir_domain_ops = { .deactivate = irq_remapping_deactivate, }; +static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) +{ + struct amd_iommu *iommu; + struct amd_iommu_pi_data *pi_data = vcpu_info; + struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; + struct amd_ir_data *ir_data = data->chip_data; + struct irte_ga *irte = (struct irte_ga *) ir_data->entry; + struct irq_2_irte *irte_info = &ir_data->irq_2_irte; + struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid); + + /* Note: + * This device has never been set up for guest mode. + * we should not modify the IRTE + */ + if (!dev_data || !dev_data->use_vapic) + return 0; + + pi_data->ir_data = ir_data; + + /* Note: + * SVM tries to set up for VAPIC mode, but we are in + * legacy mode. So, we force legacy mode instead. + */ + if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { + pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n", + __func__); + pi_data->is_guest_mode = false; + } + + iommu = amd_iommu_rlookup_table[irte_info->devid]; + if (iommu == NULL) + return -EINVAL; + + pi_data->prev_ga_tag = ir_data->cached_ga_tag; + if (pi_data->is_guest_mode) { + /* Setting */ + irte->hi.fields.ga_root_ptr = (pi_data->base >> 12); + irte->hi.fields.vector = vcpu_pi_info->vector; + irte->lo.fields_vapic.guest_mode = 1; + irte->lo.fields_vapic.ga_tag = pi_data->ga_tag; + + ir_data->cached_ga_tag = pi_data->ga_tag; + } else { + /* Un-Setting */ + struct irq_cfg *cfg = irqd_cfg(data); + + irte->hi.val = 0; + irte->lo.val = 0; + irte->hi.fields.vector = cfg->vector; + irte->lo.fields_remap.guest_mode = 0; + irte->lo.fields_remap.destination = cfg->dest_apicid; + irte->lo.fields_remap.int_type = apic->irq_delivery_mode; + irte->lo.fields_remap.dm = apic->irq_dest_mode; + + /* + * This communicates the ga_tag back to the caller + * so that it can do all the necessary clean up. + */ + ir_data->cached_ga_tag = 0; + } + + return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data); +} + static int amd_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -3997,8 +4324,12 @@ static int amd_ir_set_affinity(struct irq_data *data, struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct irq_cfg *cfg = irqd_cfg(data); struct irq_data *parent = data->parent_data; + struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; int ret; + if (!iommu) + return -ENODEV; + ret = parent->chip->irq_set_affinity(parent, mask, force); if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; @@ -4007,9 +4338,8 @@ static int amd_ir_set_affinity(struct irq_data *data, * Atomically updates the IRTE with the new destination, vector * and flushes the interrupt entry cache. */ - ir_data->irte_entry.fields.vector = cfg->vector; - ir_data->irte_entry.fields.destination = cfg->dest_apicid; - modify_irte(irte_info->devid, irte_info->index, ir_data->irte_entry); + iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, + irte_info->index, cfg->vector, cfg->dest_apicid); /* * After this point, all the interrupts will start arriving @@ -4031,6 +4361,7 @@ static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) static struct irq_chip amd_ir_chip = { .irq_ack = ir_ack_apic_edge, .irq_set_affinity = amd_ir_set_affinity, + .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, .irq_compose_msi_msg = ir_compose_msi_msg, }; @@ -4045,4 +4376,43 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) return 0; } + +int amd_iommu_update_ga(int cpu, bool is_run, void *data) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct irq_remap_table *irt; + struct amd_ir_data *ir_data = (struct amd_ir_data *)data; + int devid = ir_data->irq_2_irte.devid; + struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + struct irte_ga *ref = (struct irte_ga *) ir_data->ref; + + if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || + !ref || !entry || !entry->lo.fields_vapic.guest_mode) + return 0; + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return -ENODEV; + + irt = get_irq_table(devid, false); + if (!irt) + return -ENODEV; + + spin_lock_irqsave(&irt->lock, flags); + + if (ref->lo.fields_vapic.guest_mode) { + if (cpu >= 0) + ref->lo.fields_vapic.destination = cpu; + ref->lo.fields_vapic.is_run = is_run; + barrier(); + } + + spin_unlock_irqrestore(&irt->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + return 0; +} +EXPORT_SYMBOL(amd_iommu_update_ga); #endif diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 59741ead7e15..cd1713631a4a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -84,6 +84,7 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 +#define LOOP_TIMEOUT 100000 /* * ACPI table definitions * @@ -145,6 +146,8 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; +int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; + static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; static int amd_iommu_target_ivhd_type; @@ -386,6 +389,10 @@ static void iommu_disable(struct amd_iommu *iommu) iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); + /* Disable IOMMU GA_LOG */ + iommu_feature_disable(iommu, CONTROL_GALOG_EN); + iommu_feature_disable(iommu, CONTROL_GAINT_EN); + /* Disable IOMMU hardware itself */ iommu_feature_disable(iommu, CONTROL_IOMMU_EN); } @@ -671,6 +678,99 @@ static void __init free_ppr_log(struct amd_iommu *iommu) free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); } +static void free_ga_log(struct amd_iommu *iommu) +{ +#ifdef CONFIG_IRQ_REMAP + if (iommu->ga_log) + free_pages((unsigned long)iommu->ga_log, + get_order(GA_LOG_SIZE)); + if (iommu->ga_log_tail) + free_pages((unsigned long)iommu->ga_log_tail, + get_order(8)); +#endif +} + +static int iommu_ga_log_enable(struct amd_iommu *iommu) +{ +#ifdef CONFIG_IRQ_REMAP + u32 status, i; + + if (!iommu->ga_log) + return -EINVAL; + + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + + /* Check if already running */ + if (status & (MMIO_STATUS_GALOG_RUN_MASK)) + return 0; + + iommu_feature_enable(iommu, CONTROL_GAINT_EN); + iommu_feature_enable(iommu, CONTROL_GALOG_EN); + + for (i = 0; i < LOOP_TIMEOUT; ++i) { + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (status & (MMIO_STATUS_GALOG_RUN_MASK)) + break; + } + + if (i >= LOOP_TIMEOUT) + return -EINVAL; +#endif /* CONFIG_IRQ_REMAP */ + return 0; +} + +#ifdef CONFIG_IRQ_REMAP +static int iommu_init_ga_log(struct amd_iommu *iommu) +{ + u64 entry; + + if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) + return 0; + + iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(GA_LOG_SIZE)); + if (!iommu->ga_log) + goto err_out; + + iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(8)); + if (!iommu->ga_log_tail) + goto err_out; + + entry = (u64)virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, + &entry, sizeof(entry)); + entry = ((u64)virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL; + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, + &entry, sizeof(entry)); + writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); + + return 0; +err_out: + free_ga_log(iommu); + return -EINVAL; +} +#endif /* CONFIG_IRQ_REMAP */ + +static int iommu_init_ga(struct amd_iommu *iommu) +{ + int ret = 0; + +#ifdef CONFIG_IRQ_REMAP + /* Note: We have already checked GASup from IVRS table. + * Now, we need to make sure that GAMSup is set. + */ + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !iommu_feature(iommu, FEATURE_GAM_VAPIC)) + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + + ret = iommu_init_ga_log(iommu); +#endif /* CONFIG_IRQ_REMAP */ + + return ret; +} + static void iommu_enable_gt(struct amd_iommu *iommu) { if (!iommu_feature(iommu, FEATURE_GT)) @@ -1144,6 +1244,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu) free_command_buffer(iommu); free_event_buffer(iommu); free_ppr_log(iommu); + free_ga_log(iommu); iommu_unmap_mmio_space(iommu); } @@ -1258,6 +1359,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; + if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: case 0x40: @@ -1265,6 +1368,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; + if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; default: return -EINVAL; @@ -1432,6 +1537,7 @@ static int iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; u32 range, misc, low, high; + int ret; iommu->dev = pci_get_bus_and_slot(PCI_BUS_NUM(iommu->devid), iommu->devid & 0xff); @@ -1488,6 +1594,10 @@ static int iommu_init_pci(struct amd_iommu *iommu) if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; + ret = iommu_init_ga(iommu); + if (ret) + return ret; + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) amd_iommu_np_cache = true; @@ -1545,16 +1655,24 @@ static void print_iommu_info(void) dev_name(&iommu->dev->dev), iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("AMD-Vi: Extended features: "); + pr_info("AMD-Vi: Extended features (%#llx):\n", + iommu->features); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); } + + if (iommu->features & FEATURE_GAM_VAPIC) + pr_cont(" GA_vAPIC"); + pr_cont("\n"); } } - if (irq_remapping_enabled) + if (irq_remapping_enabled) { pr_info("AMD-Vi: Interrupt remapping enabled\n"); + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) + pr_info("AMD-Vi: virtual APIC enabled\n"); + } } static int __init amd_iommu_init_pci(void) @@ -1645,6 +1763,8 @@ enable_faults: if (iommu->ppr_log != NULL) iommu_feature_enable(iommu, CONTROL_PPFINT_EN); + iommu_ga_log_enable(iommu); + return 0; } @@ -1862,6 +1982,24 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu) iommu->stored_addr_lo | 1); } +static void iommu_enable_ga(struct amd_iommu *iommu) +{ +#ifdef CONFIG_IRQ_REMAP + switch (amd_iommu_guest_ir) { + case AMD_IOMMU_GUEST_IR_VAPIC: + iommu_feature_enable(iommu, CONTROL_GAM_EN); + /* Fall through */ + case AMD_IOMMU_GUEST_IR_LEGACY_GA: + iommu_feature_enable(iommu, CONTROL_GA_EN); + iommu->irte_ops = &irte_128_ops; + break; + default: + iommu->irte_ops = &irte_32_ops; + break; + } +#endif +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1877,9 +2015,15 @@ static void early_enable_iommus(void) iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); + iommu_enable_ga(iommu); iommu_enable(iommu); iommu_flush_all_caches(iommu); } + +#ifdef CONFIG_IRQ_REMAP + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) + amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); +#endif } static void enable_iommus_v2(void) @@ -1905,6 +2049,11 @@ static void disable_iommus(void) for_each_iommu(iommu) iommu_disable(iommu); + +#ifdef CONFIG_IRQ_REMAP + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) + amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); +#endif } /* @@ -2059,7 +2208,7 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; acpi_size ivrs_size; acpi_status status; - int i, ret = 0; + int i, remap_cache_sz, ret = 0; if (!amd_iommu_detected) return -ENODEV; @@ -2157,10 +2306,14 @@ static int __init early_amd_iommu_init(void) * remapping tables. */ ret = -ENOMEM; + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); + else + remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", - MAX_IRQS_PER_TABLE * sizeof(u32), - IRQ_TABLE_ALIGNMENT, - 0, NULL); + remap_cache_sz, + IRQ_TABLE_ALIGNMENT, + 0, NULL); if (!amd_iommu_irq_cache) goto out; @@ -2413,6 +2566,21 @@ static int __init parse_amd_iommu_dump(char *str) return 1; } +static int __init parse_amd_iommu_intr(char *str) +{ + for (; *str; ++str) { + if (strncmp(str, "legacy", 6) == 0) { + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + break; + } + if (strncmp(str, "vapic", 5) == 0) { + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; + break; + } + } + return 1; +} + static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { @@ -2521,6 +2689,7 @@ static int __init parse_ivrs_acpihid(char *str) __setup("amd_iommu_dump", parse_amd_iommu_dump); __setup("amd_iommu=", parse_amd_iommu_options); +__setup("amd_iommu_intr=", parse_amd_iommu_intr); __setup("ivrs_ioapic", parse_ivrs_ioapic); __setup("ivrs_hpet", parse_ivrs_hpet); __setup("ivrs_acpihid", parse_ivrs_acpihid); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 0bd9eb374462..faa3b4895cf0 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -38,6 +38,7 @@ extern int amd_iommu_enable(void); extern void amd_iommu_disable(void); extern int amd_iommu_reenable(int); extern int amd_iommu_enable_faulting(void); +extern int amd_iommu_guest_ir; /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 9652848e3155..0d91785ebdc3 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -22,6 +22,7 @@ #include <linux/types.h> #include <linux/mutex.h> +#include <linux/msi.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/pci.h> @@ -69,6 +70,8 @@ #define MMIO_EXCL_LIMIT_OFFSET 0x0028 #define MMIO_EXT_FEATURES 0x0030 #define MMIO_PPR_LOG_OFFSET 0x0038 +#define MMIO_GA_LOG_BASE_OFFSET 0x00e0 +#define MMIO_GA_LOG_TAIL_OFFSET 0x00e8 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 @@ -76,6 +79,8 @@ #define MMIO_STATUS_OFFSET 0x2020 #define MMIO_PPR_HEAD_OFFSET 0x2030 #define MMIO_PPR_TAIL_OFFSET 0x2038 +#define MMIO_GA_HEAD_OFFSET 0x2040 +#define MMIO_GA_TAIL_OFFSET 0x2048 #define MMIO_CNTR_CONF_OFFSET 0x4000 #define MMIO_CNTR_REG_OFFSET 0x40000 #define MMIO_REG_END_OFFSET 0x80000 @@ -92,6 +97,7 @@ #define FEATURE_GA (1ULL<<7) #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) +#define FEATURE_GAM_VAPIC (1ULL<<21) #define FEATURE_PASID_SHIFT 32 #define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) @@ -110,6 +116,9 @@ #define MMIO_STATUS_EVT_INT_MASK (1 << 1) #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) #define MMIO_STATUS_PPR_INT_MASK (1 << 6) +#define MMIO_STATUS_GALOG_RUN_MASK (1 << 8) +#define MMIO_STATUS_GALOG_OVERFLOW_MASK (1 << 9) +#define MMIO_STATUS_GALOG_INT_MASK (1 << 10) /* event logging constants */ #define EVENT_ENTRY_SIZE 0x10 @@ -146,6 +155,10 @@ #define CONTROL_PPFINT_EN 0x0eULL #define CONTROL_PPR_EN 0x0fULL #define CONTROL_GT_EN 0x10ULL +#define CONTROL_GA_EN 0x11ULL +#define CONTROL_GAM_EN 0x19ULL +#define CONTROL_GALOG_EN 0x1CULL +#define CONTROL_GAINT_EN 0x1DULL #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 @@ -224,6 +237,19 @@ #define PPR_REQ_FAULT 0x01 +/* Constants for GA Log handling */ +#define GA_LOG_ENTRIES 512 +#define GA_LOG_SIZE_SHIFT 56 +#define GA_LOG_SIZE_512 (0x8ULL << GA_LOG_SIZE_SHIFT) +#define GA_ENTRY_SIZE 8 +#define GA_LOG_SIZE (GA_ENTRY_SIZE * GA_LOG_ENTRIES) + +#define GA_TAG(x) (u32)(x & 0xffffffffULL) +#define GA_DEVID(x) (u16)(((x) >> 32) & 0xffffULL) +#define GA_REQ_TYPE(x) (((x) >> 60) & 0xfULL) + +#define GA_GUEST_NR 0x1 + #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 @@ -329,6 +355,12 @@ #define IOMMU_CAP_NPCACHE 26 #define IOMMU_CAP_EFR 27 +/* IOMMU Feature Reporting Field (for IVHD type 10h */ +#define IOMMU_FEAT_GASUP_SHIFT 6 + +/* IOMMU Extended Feature Register (EFR) */ +#define IOMMU_EFR_GASUP_SHIFT 7 + #define MAX_DOMAIN_ID 65536 /* Protection domain flags */ @@ -400,6 +432,7 @@ struct amd_iommu_fault { struct iommu_domain; struct irq_domain; +struct amd_irte_ops; /* * This structure contains generic data for IOMMU protection domains @@ -490,6 +523,12 @@ struct amd_iommu { /* Base of the PPR log, if present */ u8 *ppr_log; + /* Base of the GA log, if present */ + u8 *ga_log; + + /* Tail of the GA log, if present */ + u8 *ga_log_tail; + /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; @@ -523,6 +562,8 @@ struct amd_iommu { #ifdef CONFIG_IRQ_REMAP struct irq_domain *ir_domain; struct irq_domain *msi_domain; + + struct amd_irte_ops *irte_ops; #endif volatile u64 __aligned(8) cmd_sem; @@ -683,4 +724,112 @@ static inline int get_hpet_devid(int id) return -EINVAL; } +enum amd_iommu_intr_mode_type { + AMD_IOMMU_GUEST_IR_LEGACY, + + /* This mode is not visible to users. It is used when + * we cannot fully enable vAPIC and fallback to only support + * legacy interrupt remapping via 128-bit IRTE. + */ + AMD_IOMMU_GUEST_IR_LEGACY_GA, + AMD_IOMMU_GUEST_IR_VAPIC, +}; + +#define AMD_IOMMU_GUEST_IR_GA(x) (x == AMD_IOMMU_GUEST_IR_VAPIC || \ + x == AMD_IOMMU_GUEST_IR_LEGACY_GA) + +#define AMD_IOMMU_GUEST_IR_VAPIC(x) (x == AMD_IOMMU_GUEST_IR_VAPIC) + +union irte { + u32 val; + struct { + u32 valid : 1, + no_fault : 1, + int_type : 3, + rq_eoi : 1, + dm : 1, + rsvd_1 : 1, + destination : 8, + vector : 8, + rsvd_2 : 8; + } fields; +}; + +union irte_ga_lo { + u64 val; + + /* For int remapping */ + struct { + u64 valid : 1, + no_fault : 1, + /* ------ */ + int_type : 3, + rq_eoi : 1, + dm : 1, + /* ------ */ + guest_mode : 1, + destination : 8, + rsvd : 48; + } fields_remap; + + /* For guest vAPIC */ + struct { + u64 valid : 1, + no_fault : 1, + /* ------ */ + ga_log_intr : 1, + rsvd1 : 3, + is_run : 1, + /* ------ */ + guest_mode : 1, + destination : 8, + rsvd2 : 16, + ga_tag : 32; + } fields_vapic; +}; + +union irte_ga_hi { + u64 val; + struct { + u64 vector : 8, + rsvd_1 : 4, + ga_root_ptr : 40, + rsvd_2 : 12; + } fields; +}; + +struct irte_ga { + union irte_ga_lo lo; + union irte_ga_hi hi; +}; + +struct irq_2_irte { + u16 devid; /* Device ID for IRTE table */ + u16 index; /* Index into IRTE table*/ +}; + +struct amd_ir_data { + u32 cached_ga_tag; + struct irq_2_irte irq_2_irte; + struct msi_msg msi_entry; + void *entry; /* Pointer to union irte or struct irte_ga */ + void *ref; /* Pointer to the actual irte */ +}; + +struct amd_irte_ops { + void (*prepare)(void *, u32, u32, u8, u32, int); + void (*activate)(void *, u16, u16); + void (*deactivate)(void *, u16, u16); + void (*set_affinity)(void *, u16, u16, u8, u32); + void *(*get)(struct irq_remap_table *, int); + void (*set_allocated)(struct irq_remap_table *, int); + bool (*is_allocated)(struct irq_remap_table *, int); + void (*clear_allocated)(struct irq_remap_table *, int); +}; + +#ifdef CONFIG_IRQ_REMAP +extern struct amd_irte_ops irte_32_ops; +extern struct amd_irte_ops irte_128_ops; +#endif + #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ |