From ad5fb870c486d932a1749d7853dd70f436a7e03f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 3 Apr 2015 12:05:28 -0400 Subject: e820, efi: add ACPI 6.0 persistent memory types ACPI 6.0 formalizes e820-type-7 and efi-type-14 as persistent memory. Mark it "reserved" and allow it to be claimed by a persistent memory device driver. This definition is in addition to the Linux kernel's existing type-12 definition that was recently added in support of shipping platforms with NVDIMM support that predate ACPI 6.0 (which now classifies type-12 as OEM reserved). Note, /proc/iomem can be consulted for differentiating legacy "Persistent Memory (legacy)" E820_PRAM vs standard "Persistent Memory" E820_PMEM. Cc: Boaz Harrosh Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jens Axboe Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Thomas Gleixner Acked-by: Jeff Moyer Acked-by: Andy Lutomirski Reviewed-by: Ross Zwisler Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- arch/arm64/kernel/efi.c | 1 + arch/ia64/kernel/efi.c | 4 ++++ arch/x86/boot/compressed/eboot.c | 4 ++++ arch/x86/include/uapi/asm/e820.h | 1 + arch/x86/kernel/e820.c | 28 ++++++++++++++++++++++++---- arch/x86/platform/efi/efi.c | 3 +++ 6 files changed, 37 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ab21e0d58278..9d4aa18f2a82 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -158,6 +158,7 @@ static __init int is_reserve_region(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: return 0; default: break; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index c52d7540dc05..5f6be9dd6968 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1223,6 +1223,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, flags |= IORESOURCE_DISABLED; break; + case EFI_PERSISTENT_MEMORY: + name = "Persistent Memory"; + break; + case EFI_RESERVED_TYPE: case EFI_RUNTIME_SERVICES_CODE: case EFI_RUNTIME_SERVICES_DATA: diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 48304b89b601..2c82bd150d43 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1224,6 +1224,10 @@ static efi_status_t setup_e820(struct boot_params *params, e820_type = E820_NVS; break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_PMEM; + break; + default: continue; } diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 960a8a9dc4ab..0f457e6eab18 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -32,6 +32,7 @@ #define E820_ACPI 3 #define E820_NVS 4 #define E820_UNUSABLE 5 +#define E820_PMEM 7 /* * This is a non-standardized way to represent ADR or NVDIMM regions that diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e2ce85db2283..c857d53269dd 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -149,6 +149,7 @@ static void __init e820_print_type(u32 type) case E820_UNUSABLE: printk(KERN_CONT "unusable"); break; + case E820_PMEM: case E820_PRAM: printk(KERN_CONT "persistent (type %u)", type); break; @@ -918,11 +919,32 @@ static inline const char *e820_type_to_string(int e820_type) case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; case E820_UNUSABLE: return "Unusable memory"; - case E820_PRAM: return "Persistent RAM"; + case E820_PRAM: return "Persistent Memory (legacy)"; + case E820_PMEM: return "Persistent Memory"; default: return "reserved"; } } +static bool do_mark_busy(u32 type, struct resource *res) +{ + /* this is the legacy bios/dos rom-shadow + mmio region */ + if (res->start < (1ULL<<20)) + return true; + + /* + * Treat persistent memory like device memory, i.e. reserve it + * for exclusive use of a driver + */ + switch (type) { + case E820_RESERVED: + case E820_PRAM: + case E820_PMEM: + return false; + default: + return true; + } +} + /* * Mark e820 reserved areas as busy for the resource manager. */ @@ -952,9 +974,7 @@ void __init e820_reserve_resources(void) * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (((e820.map[i].type != E820_RESERVED) && - (e820.map[i].type != E820_PRAM)) || - res->start < (1ULL<<20)) { + if (do_mark_busy(e820.map[i].type, res)) { res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 02744df576d5..fe01ae37a2a4 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -153,6 +153,9 @@ static void __init do_add_efi_memmap(void) case EFI_UNUSABLE_MEMORY: e820_type = E820_UNUSABLE; break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_PMEM; + break; default: /* * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE -- cgit v1.2.3 From 9f53f9fa4ad1d8bddd4d14359cdabc531aedffe8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 9 Jun 2015 15:33:45 -0400 Subject: libnvdimm, pmem: add libnvdimm support to the pmem driver nd_pmem attaches to persistent memory regions and namespaces emitted by the libnvdimm subsystem, and, same as the original pmem driver, presents the system-physical-address range as a block device. The existing e820-type-12 to pmem setup is converted to an nvdimm_bus that emits an nd_namespace_io device. Note that the X in 'pmemX' is now derived from the parent region. This provides some stability to the pmem devices names from boot-to-boot. The minor numbers are also more predictable by passing 0 to alloc_disk(). Cc: Andy Lutomirski Cc: Boaz Harrosh Cc: H. Peter Anvin Cc: Jens Axboe Cc: Ingo Molnar Cc: Christoph Hellwig Signed-off-by: Ross Zwisler Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- arch/x86/Kconfig | 3 ++ arch/x86/kernel/pmem.c | 92 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 63 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..1a2cbf641667 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1424,6 +1424,9 @@ source "mm/Kconfig" config X86_PMEM_LEGACY bool "Support non-standard NVDIMMs and ADR protected memory" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + select LIBNVDIMM help Treat memory marked using the non-standard e820 type of 12 as used by the Intel Sandy Bridge-EP reference BIOS as protected memory. diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 3420c874ddc5..0f4ef472ab9e 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -1,53 +1,81 @@ /* * Copyright (c) 2015, Christoph Hellwig. + * Copyright (c) 2015, Intel Corporation. */ -#include #include -#include +#include +#include #include -#include -#include -static __init void register_pmem_device(struct resource *res) +static void e820_pmem_release(struct device *dev) { - struct platform_device *pdev; - int error; + struct nvdimm_bus *nvdimm_bus = dev->platform_data; - pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO); - if (!pdev) - return; + if (nvdimm_bus) + nvdimm_bus_unregister(nvdimm_bus); +} - error = platform_device_add_resources(pdev, res, 1); - if (error) - goto out_put_pdev; +static struct platform_device e820_pmem = { + .name = "e820_pmem", + .id = -1, + .dev = { + .release = e820_pmem_release, + }, +}; - error = platform_device_add(pdev); - if (error) - goto out_put_pdev; - return; +static const struct attribute_group *e820_pmem_attribute_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; -out_put_pdev: - dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n"); - platform_device_put(pdev); -} +static const struct attribute_group *e820_pmem_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; -static __init int register_pmem_devices(void) +static __init int register_e820_pmem(void) { - int i; + static struct nvdimm_bus_descriptor nd_desc; + struct device *dev = &e820_pmem.dev; + struct nvdimm_bus *nvdimm_bus; + int rc, i; + + rc = platform_device_register(&e820_pmem); + if (rc) + return rc; + + nd_desc.attr_groups = e820_pmem_attribute_groups; + nd_desc.provider_name = "e820"; + nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); + if (!nvdimm_bus) + goto err; + dev->platform_data = nvdimm_bus; for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; + struct resource res = { + .flags = IORESOURCE_MEM, + .start = ei->addr, + .end = ei->addr + ei->size - 1, + }; + struct nd_region_desc ndr_desc; + + if (ei->type != E820_PRAM) + continue; - if (ei->type == E820_PRAM) { - struct resource res = { - .flags = IORESOURCE_MEM, - .start = ei->addr, - .end = ei->addr + ei->size - 1, - }; - register_pmem_device(&res); - } + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.res = &res; + ndr_desc.attr_groups = e820_pmem_region_attribute_groups; + if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) + goto err; } return 0; + + err: + dev_err(dev, "failed to register legacy persistent memory ranges\n"); + platform_device_unregister(&e820_pmem); + return -ENXIO; } -device_initcall(register_pmem_devices); +device_initcall(register_e820_pmem); -- cgit v1.2.3 From 41d7a6d637e1440f5410cb43c25a3c41255540c5 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 19 Jun 2015 12:18:33 -0600 Subject: libnvdimm: Set numa_node to NVDIMM devices ACPI NFIT table has System Physical Address Range Structure entries that describe a proximity ID of each range when ACPI_NFIT_PROXIMITY_VALID is set in the flags. Change acpi_nfit_register_region() to map a proximity ID to its node ID, and set it to a new numa_node field of nd_region_desc, which is then conveyed to the nd_region device. The device core arranges for btt and namespace devices to inherit their node from their parent region. Signed-off-by: Toshi Kani [djbw: move set_dev_node() from region.c to bus.c] Signed-off-by: Dan Williams --- arch/x86/kernel/pmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 0f4ef472ab9e..64f90f53bb85 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -67,6 +67,7 @@ static __init int register_e820_pmem(void) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.res = &res; ndr_desc.attr_groups = e820_pmem_region_attribute_groups; + ndr_desc.numa_node = NUMA_NO_NODE; if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) goto err; } -- cgit v1.2.3 From 61031952f4c89dba1065f7a5b9419badb112554c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 25 Jun 2015 03:08:39 -0400 Subject: arch, x86: pmem api for ensuring durability of persistent memory updates Based on an original patch by Ross Zwisler [1]. Writes to persistent memory have the potential to be posted to cpu cache, cpu write buffers, and platform write buffers (memory controller) before being committed to persistent media. Provide apis, memcpy_to_pmem(), wmb_pmem(), and memremap_pmem(), to write data to pmem and assert that it is durable in PMEM (a persistent linear address range). A '__pmem' attribute is added so sparse can track proper usage of pointers to pmem. This continues the status quo of pmem being x86 only for 4.2, but reworks to ioremap, and wider implementation of memremap() will enable other archs in 4.3. [1]: https://lists.01.org/pipermail/linux-nvdimm/2015-May/000932.html Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Ross Zwisler [djbw: various reworks] Signed-off-by: Dan Williams --- arch/x86/Kconfig | 1 + arch/x86/include/asm/cacheflush.h | 72 +++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/io.h | 6 ++++ 3 files changed, 79 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1a2cbf641667..62564ddf7f78 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_PMEM_API select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_AOUT if X86_32 diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 47c8e32f621a..ec23bb753a3e 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -4,6 +4,7 @@ /* Caches aren't brain-dead on the intel. */ #include #include +#include /* * The set_memory_* API can be used to change various attributes of a virtual @@ -104,4 +105,75 @@ static inline int rodata_test(void) } #endif +#ifdef ARCH_HAS_NOCACHE_UACCESS + +/** + * arch_memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Copy data to persistent memory media via non-temporal stores so that + * a subsequent arch_wmb_pmem() can flush cpu and memory controller + * write buffers to guarantee durability. + */ +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + int unwritten; + + /* + * We are copying between two kernel buffers, if + * __copy_from_user_inatomic_nocache() returns an error (page + * fault) we would have already reported a general protection fault + * before the WARN+BUG. + */ + unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, + (void __user *) src, n); + if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", + __func__, dst, src, unwritten)) + BUG(); +} + +/** + * arch_wmb_pmem - synchronize writes to persistent memory + * + * After a series of arch_memcpy_to_pmem() operations this drains data + * from cpu write buffers and any platform (memory controller) buffers + * to ensure that written data is durable on persistent memory media. + */ +static inline void arch_wmb_pmem(void) +{ + /* + * wmb() to 'sfence' all previous writes such that they are + * architecturally visible to 'pcommit'. Note, that we've + * already arranged for pmem writes to avoid the cache via + * arch_memcpy_to_pmem(). + */ + wmb(); + pcommit_sfence(); +} + +static inline bool __arch_has_wmb_pmem(void) +{ +#ifdef CONFIG_X86_64 + /* + * We require that wmb() be an 'sfence', that is only guaranteed on + * 64-bit builds + */ + return static_cpu_has(X86_FEATURE_PCOMMIT); +#else + return false; +#endif +} +#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */ +extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n); +extern void arch_wmb_pmem(void); + +static inline bool __arch_has_wmb_pmem(void) +{ + return false; +} +#endif + #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34a5b93704d3..c60c3f3b0183 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -247,6 +247,12 @@ static inline void flush_write_buffers(void) #endif } +static inline void __pmem *arch_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return (void __force __pmem *) ioremap_cache(offset, size); +} + #endif /* __KERNEL__ */ extern void native_io_delay(void); -- cgit v1.2.3