From c23c80822fbdf69c1aacbca50b8339972697f850 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 7 Jul 2021 18:07:34 -0700 Subject: lib: fix spelling mistakes in header files Fix some spelling mistakes in comments found by "codespell": Hoever ==> However poiter ==> pointer representaion ==> representation uppon ==> upon independend ==> independent aquired ==> acquired mis-match ==> mismatch scrach ==> scratch struture ==> structure Analagous ==> Analogous interation ==> iteration And some were discovered manually by Joe Perches and Christoph Lameter: stroed ==> stored arch independent ==> an architecture independent A example structure for ==> Example structure for Link: https://lkml.kernel.org/r/20210609150027.14805-2-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Cc: Christoph Lameter Cc: Masami Hiramatsu Cc: Dennis Zhou Cc: Tejun Heo Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootconfig.h | 4 ++-- include/linux/cpumask.h | 2 +- include/linux/debugobjects.h | 2 +- include/linux/lru_cache.h | 8 ++++---- include/linux/nodemask.h | 6 +++--- include/linux/percpu-refcount.h | 2 +- include/linux/scatterlist.h | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 6bdd94cff4e2..abe089c27529 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -214,10 +214,10 @@ static inline struct xbc_node * __init xbc_node_get_subkey(struct xbc_node *node * @value: Iterated value of array entry. * * Iterate array entries of given @key under @node. Each array entry node - * is stroed to @anode and @value. If the @node doesn't have @key node, + * is stored to @anode and @value. If the @node doesn't have @key node, * it does nothing. * Note that even if the found key node has only one value (not array) - * this executes block once. Hoever, if the found key node has no value + * this executes block once. However, if the found key node has no value * (key-only node), this does nothing. So don't use this for testing the * key-value pair existence. */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bfc4690de4f4..f3689a52bfd0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -259,7 +259,7 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location * @cpu: the (optionally unsigned) integer iterator - * @mask: the cpumask poiter + * @mask: the cpumask pointer * @start: the start location * * The implementation does not assume any bit in @mask is set (including @start). diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 8d2dde23e9fb..32444686b6ff 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -18,7 +18,7 @@ enum debug_obj_state { struct debug_obj_descr; /** - * struct debug_obj - representaion of an tracked object + * struct debug_obj - representation of an tracked object * @node: hlist node to link the object into the tracker list * @state: tracked object state * @astate: current active state diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 429d67d815ce..07add7882a5d 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -32,7 +32,7 @@ This header file (and its .c file; kernel-doc of functions see there) Because of this later property, it is called "lru_cache". As it actually Tracks Objects in an Active SeT, we could also call it toast (incidentally that is what may happen to the data on the - backend storage uppon next resync, if we don't get it right). + backend storage upon next resync, if we don't get it right). What for? @@ -152,7 +152,7 @@ struct lc_element { * for paranoia, and for "lc_element_to_index" */ unsigned lc_index; /* if we want to track a larger set of objects, - * it needs to become arch independend u64 */ + * it needs to become an architecture independent u64 */ unsigned lc_number; /* special label when on free list */ #define LC_FREE (~0U) @@ -263,7 +263,7 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char * * Allows (expects) the set to be "dirty". Note that the reference counts and * order on the active and lru lists may still change. Used to serialize - * changing transactions. Returns true if we aquired the lock. + * changing transactions. Returns true if we acquired the lock. */ static inline int lc_try_lock_for_transaction(struct lru_cache *lc) { @@ -275,7 +275,7 @@ static inline int lc_try_lock_for_transaction(struct lru_cache *lc) * @lc: the lru cache to operate on * * Note that the reference counts and order on the active and lru lists may - * still change. Only works on a "clean" set. Returns true if we aquired the + * still change. Only works on a "clean" set. Returns true if we acquired the * lock, which means there are no pending changes, and any further attempt to * change the set will not succeed until the next lc_unlock(). */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index ac398e143c9a..567c3ddba2c4 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -119,7 +119,7 @@ static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions * are called in both __init and non-__init functions, if they are not - * inlined we will end up with a section mis-match error (of the type of + * inlined we will end up with a section mismatch error (of the type of * freeable items not being freed). So we must use __always_inline here * to fix the problem. If other functions in the future also end up in * this situation they will also need to be annotated as __always_inline @@ -515,7 +515,7 @@ static inline int node_random(const nodemask_t *mask) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) /* - * For nodemask scrach area. + * For nodemask scratch area. * NODEMASK_ALLOC(type, name) allocates an object with a specified type and * name. */ @@ -528,7 +528,7 @@ static inline int node_random(const nodemask_t *mask) #define NODEMASK_FREE(m) do {} while (0) #endif -/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */ +/* Example structure for using NODEMASK_ALLOC, used in mempolicy. */ struct nodemask_scratch { nodemask_t mask1; nodemask_t mask2; diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 16c35a728b4c..ae16a9856305 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -213,7 +213,7 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) * percpu_ref_get - increment a percpu refcount * @ref: percpu_ref to get * - * Analagous to atomic_long_inc(). + * Analogous to atomic_long_inc(). * * This function is safe to call as long as @ref is between init and exit. */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 6f70572b2938..ecf87484814f 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -474,7 +474,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * Iterates over sg entries mapping page-by-page. On each successful * iteration, @miter->page points to the mapped page and * @miter->length bytes of data can be accessed at @miter->addr. As - * long as an interation is enclosed between start and stop, the user + * long as an iteration is enclosed between start and stop, the user * is free to choose control structure and when to stop. * * @miter->consumed is set to @miter->length on each iteration. It -- cgit v1.2.3 From 6d47c23b16aa78ff93a3050ccf4b1bd1c064b8b3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:07:59 -0700 Subject: set_memory: allow querying whether set_direct_map_*() is actually enabled On arm64, set_direct_map_*() functions may return 0 without actually changing the linear map. This behaviour can be controlled using kernel parameters, so we need a way to determine at runtime whether calls to set_direct_map_invalid_noflush() and set_direct_map_default_noflush() have any effect. Extend set_memory API with can_set_direct_map() function that allows checking if calling set_direct_map_*() will actually change the page table, replace several occurrences of open coded checks in arm64 with the new function and provide a generic stub for architectures that always modify page tables upon calls to set_direct_map APIs. [arnd@arndb.de: arm64: kfence: fix header inclusion ] Link: https://lkml.kernel.org/r/20210518072034.31572-4-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: Catalin Marinas Reviewed-by: David Hildenbrand Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: Elena Reshetova Cc: Hagen Paul Pfeifer Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Mark Rutland Cc: Matthew Wilcox Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/set_memory.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index fe1aa4e54680..f36be5166c19 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -28,7 +28,19 @@ static inline bool kernel_page_present(struct page *page) { return true; } +#else /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +/* + * Some architectures, e.g. ARM64 can disable direct map modifications at + * boot time. Let them overrive this query. + */ +#ifndef can_set_direct_map +static inline bool can_set_direct_map(void) +{ + return true; +} +#define can_set_direct_map can_set_direct_map #endif +#endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */ #ifndef set_mce_nospec static inline int set_mce_nospec(unsigned long pfn, bool unmap) -- cgit v1.2.3 From 1507f51255c9ff07d75909a84e7c0d7f3c4b2f49 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:08:03 -0700 Subject: mm: introduce memfd_secret system call to create "secret" memory areas Introduce "memfd_secret" system call with the ability to create memory areas visible only in the context of the owning process and not mapped not only to other processes but in the kernel page tables as well. The secretmem feature is off by default and the user must explicitly enable it at the boot time. Once secretmem is enabled, the user will be able to create a file descriptor using the memfd_secret() system call. The memory areas created by mmap() calls from this file descriptor will be unmapped from the kernel direct map and they will be only mapped in the page table of the processes that have access to the file descriptor. Secretmem is designed to provide the following protections: * Enhanced protection (in conjunction with all the other in-kernel attack prevention systems) against ROP attacks. Seceretmem makes "simple" ROP insufficient to perform exfiltration, which increases the required complexity of the attack. Along with other protections like the kernel stack size limit and address space layout randomization which make finding gadgets is really hard, absence of any in-kernel primitive for accessing secret memory means the one gadget ROP attack can't work. Since the only way to access secret memory is to reconstruct the missing mapping entry, the attacker has to recover the physical page and insert a PTE pointing to it in the kernel and then retrieve the contents. That takes at least three gadgets which is a level of difficulty beyond most standard attacks. * Prevent cross-process secret userspace memory exposures. Once the secret memory is allocated, the user can't accidentally pass it into the kernel to be transmitted somewhere. The secreremem pages cannot be accessed via the direct map and they are disallowed in GUP. * Harden against exploited kernel flaws. In order to access secretmem, a kernel-side attack would need to either walk the page tables and create new ones, or spawn a new privileged uiserspace process to perform secrets exfiltration using ptrace. The file descriptor based memory has several advantages over the "traditional" mm interfaces, such as mlock(), mprotect(), madvise(). File descriptor approach allows explicit and controlled sharing of the memory areas, it allows to seal the operations. Besides, file descriptor based memory paves the way for VMMs to remove the secret memory range from the userspace hipervisor process, for instance QEMU. Andy Lutomirski says: "Getting fd-backed memory into a guest will take some possibly major work in the kernel, but getting vma-backed memory into a guest without mapping it in the host user address space seems much, much worse." memfd_secret() is made a dedicated system call rather than an extension to memfd_create() because it's purpose is to allow the user to create more secure memory mappings rather than to simply allow file based access to the memory. Nowadays a new system call cost is negligible while it is way simpler for userspace to deal with a clear-cut system calls than with a multiplexer or an overloaded syscall. Moreover, the initial implementation of memfd_secret() is completely distinct from memfd_create() so there is no much sense in overloading memfd_create() to begin with. If there will be a need for code sharing between these implementation it can be easily achieved without a need to adjust user visible APIs. The secret memory remains accessible in the process context using uaccess primitives, but it is not exposed to the kernel otherwise; secret memory areas are removed from the direct map and functions in the follow_page()/get_user_page() family will refuse to return a page that belongs to the secret memory area. Once there will be a use case that will require exposing secretmem to the kernel it will be an opt-in request in the system call flags so that user would have to decide what data can be exposed to the kernel. Removing of the pages from the direct map may cause its fragmentation on architectures that use large pages to map the physical memory which affects the system performance. However, the original Kconfig text for CONFIG_DIRECT_GBPAGES said that gigabyte pages in the direct map "... can improve the kernel's performance a tiny bit ..." (commit 00d1c5e05736 ("x86: add gbpages switches")) and the recent report [1] showed that "... although 1G mappings are a good default choice, there is no compelling evidence that it must be the only choice". Hence, it is sufficient to have secretmem disabled by default with the ability of a system administrator to enable it at boot time. Pages in the secretmem regions are unevictable and unmovable to avoid accidental exposure of the sensitive data via swap or during page migration. Since the secretmem mappings are locked in memory they cannot exceed RLIMIT_MEMLOCK. Since these mappings are already locked independently from mlock(), an attempt to mlock()/munlock() secretmem range would fail and mlockall()/munlockall() will ignore secretmem mappings. However, unlike mlock()ed memory, secretmem currently behaves more like long-term GUP: secretmem mappings are unmovable mappings directly consumed by user space. With default limits, there is no excessive use of secretmem and it poses no real problem in combination with ZONE_MOVABLE/CMA, but in the future this should be addressed to allow balanced use of large amounts of secretmem along with ZONE_MOVABLE/CMA. A page that was a part of the secret memory area is cleared when it is freed to ensure the data is not exposed to the next user of that page. The following example demonstrates creation of a secret mapping (error handling is omitted): fd = memfd_secret(0); ftruncate(fd, MAP_SIZE); ptr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); [1] https://lore.kernel.org/linux-mm/213b4567-46ce-f116-9cdf-bbd0c884eb3c@linux.intel.com/ [akpm@linux-foundation.org: suppress Kconfig whine] Link: https://lkml.kernel.org/r/20210518072034.31572-5-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Hagen Paul Pfeifer Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: Elena Reshetova Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Mark Rutland Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: David Hildenbrand Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/secretmem.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + 2 files changed, 49 insertions(+) create mode 100644 include/linux/secretmem.h (limited to 'include') diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h new file mode 100644 index 000000000000..e617b4afcc62 --- /dev/null +++ b/include/linux/secretmem.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_SECRETMEM_H +#define _LINUX_SECRETMEM_H + +#ifdef CONFIG_SECRETMEM + +extern const struct address_space_operations secretmem_aops; + +static inline bool page_is_secretmem(struct page *page) +{ + struct address_space *mapping; + + /* + * Using page_mapping() is quite slow because of the actual call + * instruction and repeated compound_head(page) inside the + * page_mapping() function. + * We know that secretmem pages are not compound and LRU so we can + * save a couple of cycles here. + */ + if (PageCompound(page) || !PageLRU(page)) + return false; + + mapping = (struct address_space *) + ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); + + if (mapping != page->mapping) + return false; + + return mapping->a_ops == &secretmem_aops; +} + +bool vma_is_secretmem(struct vm_area_struct *vma); + +#else + +static inline bool vma_is_secretmem(struct vm_area_struct *vma) +{ + return false; +} + +static inline bool page_is_secretmem(struct page *page) +{ + return false; +} + +#endif /* CONFIG_SECRETMEM */ + +#endif /* _LINUX_SECRETMEM_H */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f3956fc11de6..35687dcb1a42 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -97,5 +97,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 +#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From 9a436f8ff6316c3c1a21a758e14ded930bd615d9 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:08:07 -0700 Subject: PM: hibernate: disable when there are active secretmem users It is unsafe to allow saving of secretmem areas to the hibernation snapshot as they would be visible after the resume and this essentially will defeat the purpose of secret memory mappings. Prevent hibernation whenever there are active secret memory users. Link: https://lkml.kernel.org/r/20210518072034.31572-6-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Elena Reshetova Cc: Hagen Paul Pfeifer Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Mark Rutland Cc: Matthew Wilcox Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/secretmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index e617b4afcc62..21c3771e6a56 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -30,6 +30,7 @@ static inline bool page_is_secretmem(struct page *page) } bool vma_is_secretmem(struct vm_area_struct *vma); +bool secretmem_active(void); #else @@ -43,6 +44,11 @@ static inline bool page_is_secretmem(struct page *page) return false; } +static inline bool secretmem_active(void) +{ + return false; +} + #endif /* CONFIG_SECRETMEM */ #endif /* _LINUX_SECRETMEM_H */ -- cgit v1.2.3 From 7bb7f2ac24a028b20fca466b9633847b289b156a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 7 Jul 2021 18:08:11 -0700 Subject: arch, mm: wire up memfd_secret system call where relevant Wire up memfd_secret system call on architectures that define ARCH_HAS_SET_DIRECT_MAP, namely arm64, risc-v and x86. Link: https://lkml.kernel.org/r/20210518072034.31572-7-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Palmer Dabbelt Acked-by: Arnd Bergmann Acked-by: Catalin Marinas Acked-by: David Hildenbrand Acked-by: James Bottomley Cc: Alexander Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Christopher Lameter Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Elena Reshetova Cc: Hagen Paul Pfeifer Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Bottomley Cc: "Kirill A. Shutemov" Cc: Mark Rutland Cc: Matthew Wilcox Cc: Michael Kerrisk Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Rick Edgecombe Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shuah Khan Cc: Thomas Gleixner Cc: Tycho Andersen Cc: Will Deacon Cc: kernel test robot Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 1 + include/uapi/asm-generic/unistd.h | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 586128d5c3b8..69c9a7010081 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1050,6 +1050,7 @@ asmlinkage long sys_landlock_create_ruleset(const struct landlock_ruleset_attr _ asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type rule_type, const void __user *rule_attr, __u32 flags); asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags); +asmlinkage long sys_memfd_secret(unsigned int flags); /* * Architecture-specific system calls diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f211961ce1da..a9d6fcd95f42 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -873,8 +873,13 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) #define __NR_landlock_restrict_self 446 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) +#ifdef __ARCH_WANT_MEMFD_SECRET +#define __NR_memfd_secret 447 +__SYSCALL(__NR_memfd_secret, sys_memfd_secret) +#endif + #undef __NR_syscalls -#define __NR_syscalls 447 +#define __NR_syscalls 448 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From 06c8839815ac7aa2b44ea3bb3ee1820b08418f55 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 7 Jul 2021 18:08:19 -0700 Subject: mm: fix spelling mistakes in header files Fix some spelling mistakes in comments: successfull ==> successful potentialy ==> potentially alloced ==> allocated indicies ==> indices wont ==> won't resposible ==> responsible dirtyness ==> dirtiness droppped ==> dropped alread ==> already occured ==> occurred interupts ==> interrupts extention ==> extension slighly ==> slightly Dont't ==> Don't Link: https://lkml.kernel.org/r/20210531034849.9549-2-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Cc: Jerome Glisse Cc: Mike Kravetz Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++-- include/linux/hmm.h | 2 +- include/linux/hugetlb.h | 6 +++--- include/linux/list_lru.h | 4 ++-- include/linux/mmu_notifier.h | 8 ++++---- include/linux/percpu-defs.h | 2 +- include/linux/shrinker.h | 2 +- include/linux/vmalloc.h | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 4221888bdcd6..c24098c7acca 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -35,12 +35,12 @@ enum compact_result { COMPACT_CONTINUE, /* - * The full zone was compacted scanned but wasn't successfull to compact + * The full zone was compacted scanned but wasn't successful to compact * suitable pages. */ COMPACT_COMPLETE, /* - * direct compaction has scanned part of the zone but wasn't successfull + * direct compaction has scanned part of the zone but wasn't successful * to compact suitable pages. */ COMPACT_PARTIAL_SKIPPED, diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 866a0fa104c4..2fd2e91d5107 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -113,7 +113,7 @@ int hmm_range_fault(struct hmm_range *range); * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range * * When waiting for mmu notifiers we need some kind of time out otherwise we - * could potentialy wait for ever, 1000ms ie 1s sounds like a long time to + * could potentially wait for ever, 1000ms ie 1s sounds like a long time to * wait already. */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8e0f32f935bd..f7ca1a3870ea 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -51,7 +51,7 @@ struct hugepage_subpool { long count; long max_hpages; /* Maximum huge pages or -1 if no maximum. */ long used_hpages; /* Used count against maximum, includes */ - /* both alloced and reserved pages. */ + /* both allocated and reserved pages. */ struct hstate *hstate; long min_hpages; /* Minimum huge pages or -1 if no minimum. */ long rsv_hpages; /* Pages reserved against global pool to */ @@ -85,7 +85,7 @@ struct resv_map { * by a resv_map's lock. The set of regions within the resv_map represent * reservations for huge pages, or huge pages that have already been * instantiated within the map. The from and to elements are huge page - * indicies into the associated mapping. from indicates the starting index + * indices into the associated mapping. from indicates the starting index * of the region. to represents the first index past the end of the region. * * For example, a file region structure with from == 0 and to == 4 represents @@ -797,7 +797,7 @@ static inline bool hugepage_migration_supported(struct hstate *h) * It determines whether or not a huge page should be placed on * movable zone or not. Movability of any huge page should be * required only if huge page size is supported for migration. - * There wont be any reason for the huge page to be movable if + * There won't be any reason for the huge page to be movable if * it is not migratable to start with. Also the size of the huge * page should be large enough to be placed under a movable zone * and still feasible enough to be migratable. Just the presence diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 9dcaa3e582c9..1b5fceb565df 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -146,7 +146,7 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. - * @isolate: callback function that is resposible for deciding what to do with + * @isolate: callback function that is responsible for deciding what to do with * the item currently being scanned * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. @@ -172,7 +172,7 @@ unsigned long list_lru_walk_one(struct list_lru *lru, * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. - * @isolate: callback function that is resposible for deciding what to do with + * @isolate: callback function that is responsible for deciding what to do with * the item currently being scanned * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 6692da8d121d..45fc2c81e370 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -33,7 +33,7 @@ struct mmu_interval_notifier; * * @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same * access flags). User should soft dirty the page in the end callback to make - * sure that anyone relying on soft dirtyness catch pages that might be written + * sure that anyone relying on soft dirtiness catch pages that might be written * through non CPU mappings. * * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal @@ -167,7 +167,7 @@ struct mmu_notifier_ops { * decrease the refcount. If the refcount is decreased on * invalidate_range_start() then the VM can free pages as page * table entries are removed. If the refcount is only - * droppped on invalidate_range_end() then the driver itself + * dropped on invalidate_range_end() then the driver itself * will drop the last refcount but it must take care to flush * any secondary tlb before doing the final free on the * page. Pages will no longer be referenced by the linux @@ -196,7 +196,7 @@ struct mmu_notifier_ops { * If invalidate_range() is used to manage a non-CPU TLB with * shared page-tables, it not necessary to implement the * invalidate_range_start()/end() notifiers, as - * invalidate_range() alread catches the points in time when an + * invalidate_range() already catches the points in time when an * external TLB range needs to be flushed. For more in depth * discussion on this see Documentation/vm/mmu_notifier.rst * @@ -369,7 +369,7 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, * mmu_interval_read_retry() will return true. * * False is not reliable and only suggests a collision may not have - * occured. It can be called many times and does not have to hold the user + * occurred. It can be called many times and does not have to hold the user * provided lock. * * This call can be used as part of loops and other expensive operations to diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index dff7040f629a..af1071535de8 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -412,7 +412,7 @@ do { \ * instead. * * If there is no other protection through preempt disable and/or disabling - * interupts then one of these RMW operations can show unexpected behavior + * interrupts then one of these RMW operations can show unexpected behavior * because the execution thread was rescheduled on another processor or an * interrupt occurred and the same percpu variable was modified from the * interrupt context. diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 1eac79ce57d4..9814fff58a69 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,7 +4,7 @@ /* * This struct is used to pass information from page reclaim to the shrinkers. - * We consolidate the values for easier extention later. + * We consolidate the values for easier extension later. * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1dabd6f22486..2644425b6dce 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -29,7 +29,7 @@ struct notifier_block; /* in notifier.h */ #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ /* - * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. + * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. * * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after * shadow memory has been mapped. It's used to handle allocation errors so that @@ -247,7 +247,7 @@ static inline void set_vm_flush_reset_perms(void *addr) extern long vread(char *buf, char *addr, unsigned long count); /* - * Internals. Dont't use.. + * Internals. Don't use.. */ extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); -- cgit v1.2.3 From 5748fbc533a32459582535b759887c45ca0fe556 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Jul 2021 18:08:22 -0700 Subject: mm: add setup_initial_init_mm() helper Patch series "init_mm: cleanup ARCH's text/data/brk setup code", v3. Add setup_initial_init_mm() helper, then use it to cleanup the text, data and brk setup code. This patch (of 15): Add setup_initial_init_mm() helper to setup kernel text, data and brk. Link: https://lkml.kernel.org/r/20210608083418.137226-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20210608083418.137226-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Souptick Joarder Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guo Ren Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonas Bonn Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Nick Hu Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Rich Felker Cc: Russell King (Oracle) Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 788a0b1323d0..57453dba41b9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -238,6 +238,9 @@ int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk); + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way -- cgit v1.2.3 From 7eaf3cf3b7c5a49b3ca60e1ceb3d1d7430cc9d0e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Jul 2021 18:09:10 -0700 Subject: buildid: add API to parse build ID out of buffer Add an API that can parse the build ID out of a buffer, instead of a vma, to support printing a kernel module's build ID for stack traces. Link: https://lkml.kernel.org/r/20210511003845.2429846-3-swboyd@chromium.org Signed-off-by: Stephen Boyd Cc: Jiri Olsa Cc: Alexei Starovoitov Cc: Jessica Yu Cc: Evan Green Cc: Hsin-Yi Wang Cc: Andy Shevchenko Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Young Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Petr Mladek Cc: Rasmus Villemoes Cc: Sasha Levin Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buildid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 40232f90db6e..ebce93f26d06 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -8,5 +8,6 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); +int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); #endif -- cgit v1.2.3 From 83cc6fa0049d7c5333a53f4d959a9457340284ea Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Jul 2021 18:09:13 -0700 Subject: buildid: stash away kernels build ID on init Parse the kernel's build ID at initialization so that other code can print a hex format string representation of the running kernel's build ID. This will be used in the kdump and dump_stack code so that developers can easily locate the vmlinux debug symbols for a crash/stacktrace. [swboyd@chromium.org: fix implicit declaration of init_vmlinux_build_id()] Link: https://lkml.kernel.org/r/CAE-0n51UjTbay8N9FXAyE7_aR2+ePrQnKSRJ0gbmRsXtcLBVaw@mail.gmail.com Link: https://lkml.kernel.org/r/20210511003845.2429846-4-swboyd@chromium.org Signed-off-by: Stephen Boyd Acked-by: Baoquan He Cc: Jiri Olsa Cc: Alexei Starovoitov Cc: Jessica Yu Cc: Evan Green Cc: Hsin-Yi Wang Cc: Dave Young Cc: Vivek Goyal Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Catalin Marinas Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Petr Mladek Cc: Rasmus Villemoes Cc: Sasha Levin Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buildid.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index ebce93f26d06..f375900cf9ed 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -10,4 +10,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); +extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; +void init_vmlinux_build_id(void); + #endif -- cgit v1.2.3 From 22f4e66df79d0a730fcd6c17f3403b5ab8c72ced Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Jul 2021 18:09:17 -0700 Subject: dump_stack: add vmlinux build ID to stack traces Add the running kernel's build ID[1] to the stacktrace information header. This makes it simpler for developers to locate the vmlinux with full debuginfo for a particular kernel stacktrace. Combined with scripts/decode_stracktrace.sh, a developer can download the correct vmlinux from a debuginfod[2] server and find the exact file and line number for the functions plus offsets in a stacktrace. This is especially useful for pstore crash debugging where the kernel crashes are recorded in the pstore logs and the recovery kernel is different or the debuginfo doesn't exist on the device due to space concerns (the data can be large and a security concern). The stacktrace can be analyzed after the crash by using the build ID to find the matching vmlinux and understand where in the function something went wrong. Example stacktrace from lkdtm: WARNING: CPU: 4 PID: 3255 at drivers/misc/lkdtm/bugs.c:83 lkdtm_WARNING+0x28/0x30 [lkdtm] Modules linked in: lkdtm rfcomm algif_hash algif_skcipher af_alg xt_cgroup uinput xt_MASQUERADE CPU: 4 PID: 3255 Comm: bash Not tainted 5.11 #3 aa23f7a1231c229de205662d5a9e0d4c580f19a1 Hardware name: Google Lazor (rev3+) with KB Backlight (DT) pstate: 00400009 (nzcv daif +PAN -UAO -TCO BTYPE=--) pc : lkdtm_WARNING+0x28/0x30 [lkdtm] The hex string aa23f7a1231c229de205662d5a9e0d4c580f19a1 is the build ID, following the kernel version number. Put it all behind a config option, STACKTRACE_BUILD_ID, so that kernel developers can remove this information if they decide it is too much. Link: https://lkml.kernel.org/r/20210511003845.2429846-5-swboyd@chromium.org Link: https://fedoraproject.org/wiki/Releases/FeatureBuildId [1] Link: https://sourceware.org/elfutils/Debuginfod.html [2] Signed-off-by: Stephen Boyd Cc: Jiri Olsa Cc: Alexei Starovoitov Cc: Jessica Yu Cc: Evan Green Cc: Hsin-Yi Wang Cc: Petr Mladek Cc: Steven Rostedt Cc: Andy Shevchenko Cc: Matthew Wilcox Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Young Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Rasmus Villemoes Cc: Sasha Levin Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buildid.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index f375900cf9ed..3e8d77a93ec4 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -10,7 +10,11 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; void init_vmlinux_build_id(void); +#else +static inline void init_vmlinux_build_id(void) { } +#endif #endif -- cgit v1.2.3 From 9294523e3768030ae8afb84110bcecc66425a647 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Jul 2021 18:09:20 -0700 Subject: module: add printk formats to add module build ID to stacktraces Let's make kernel stacktraces easier to identify by including the build ID[1] of a module if the stacktrace is printing a symbol from a module. This makes it simpler for developers to locate a kernel module's full debuginfo for a particular stacktrace. Combined with scripts/decode_stracktrace.sh, a developer can download the matching debuginfo from a debuginfod[2] server and find the exact file and line number for the functions plus offsets in a stacktrace that match the module. This is especially useful for pstore crash debugging where the kernel crashes are recorded in something like console-ramoops and the recovery kernel/modules are different or the debuginfo doesn't exist on the device due to space concerns (the debuginfo can be too large for space limited devices). Originally, I put this on the %pS format, but that was quickly rejected given that %pS is used in other places such as ftrace where build IDs aren't meaningful. There was some discussions on the list to put every module build ID into the "Modules linked in:" section of the stacktrace message but that quickly becomes very hard to read once you have more than three or four modules linked in. It also provides too much information when we don't expect each module to be traversed in a stacktrace. Having the build ID for modules that aren't important just makes things messy. Splitting it to multiple lines for each module quickly explodes the number of lines printed in an oops too, possibly wrapping the warning off the console. And finally, trying to stash away each module used in a callstack to provide the ID of each symbol printed is cumbersome and would require changes to each architecture to stash away modules and return their build IDs once unwinding has completed. Instead, we opt for the simpler approach of introducing new printk formats '%pS[R]b' for "pointer symbolic backtrace with module build ID" and '%pBb' for "pointer backtrace with module build ID" and then updating the few places in the architecture layer where the stacktrace is printed to use this new format. Before: Call trace: lkdtm_WARNING+0x28/0x30 [lkdtm] direct_entry+0x16c/0x1b4 [lkdtm] full_proxy_write+0x74/0xa4 vfs_write+0xec/0x2e8 After: Call trace: lkdtm_WARNING+0x28/0x30 [lkdtm 6c2215028606bda50de823490723dc4bc5bf46f9] direct_entry+0x16c/0x1b4 [lkdtm 6c2215028606bda50de823490723dc4bc5bf46f9] full_proxy_write+0x74/0xa4 vfs_write+0xec/0x2e8 [akpm@linux-foundation.org: fix build with CONFIG_MODULES=n, tweak code layout] [rdunlap@infradead.org: fix build when CONFIG_MODULES is not set] Link: https://lkml.kernel.org/r/20210513171510.20328-1-rdunlap@infradead.org [akpm@linux-foundation.org: make kallsyms_lookup_buildid() static] [cuibixuan@huawei.com: fix build error when CONFIG_SYSFS is disabled] Link: https://lkml.kernel.org/r/20210525105049.34804-1-cuibixuan@huawei.com Link: https://lkml.kernel.org/r/20210511003845.2429846-6-swboyd@chromium.org Link: https://fedoraproject.org/wiki/Releases/FeatureBuildId [1] Link: https://sourceware.org/elfutils/Debuginfod.html [2] Signed-off-by: Stephen Boyd Signed-off-by: Bixuan Cui Signed-off-by: Randy Dunlap Cc: Jiri Olsa Cc: Alexei Starovoitov Cc: Jessica Yu Cc: Evan Green Cc: Hsin-Yi Wang Cc: Petr Mladek Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Matthew Wilcox Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Young Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 21 +++++++++++++++++++-- include/linux/module.h | 9 ++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 465060acc981..a1d6fc82d7f0 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -7,6 +7,7 @@ #define _LINUX_KALLSYMS_H #include +#include #include #include #include @@ -15,8 +16,10 @@ #include #define KSYM_NAME_LEN 128 -#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ - 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ + (KSYM_NAME_LEN - 1) + \ + 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ + (BUILD_ID_SIZE_MAX * 2) + 1) struct cred; struct module; @@ -91,8 +94,10 @@ const char *kallsyms_lookup(unsigned long addr, /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); +extern int sprint_symbol_build_id(char *buffer, unsigned long address); extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); +extern int sprint_backtrace_build_id(char *buffer, unsigned long address); int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); @@ -128,6 +133,12 @@ static inline int sprint_symbol(char *buffer, unsigned long addr) return 0; } +static inline int sprint_symbol_build_id(char *buffer, unsigned long address) +{ + *buffer = '\0'; + return 0; +} + static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) { *buffer = '\0'; @@ -140,6 +151,12 @@ static inline int sprint_backtrace(char *buffer, unsigned long addr) return 0; } +static inline int sprint_backtrace_build_id(char *buffer, unsigned long addr) +{ + *buffer = '\0'; + return 0; +} + static inline int lookup_symbol_name(unsigned long addr, char *symname) { return -ERANGE; diff --git a/include/linux/module.h b/include/linux/module.h index 8100bb477d86..8a298d820dbc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -369,6 +370,11 @@ struct module { /* Unique handle for this module */ char name[MODULE_NAME_LEN]; +#ifdef CONFIG_STACKTRACE_BUILD_ID + /* Module build ID */ + unsigned char build_id[BUILD_ID_SIZE_MAX]; +#endif + /* Sysfs stuff. */ struct module_kobject mkobj; struct module_attribute *modinfo_attrs; @@ -636,7 +642,7 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr); const char *module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, - char **modname, + char **modname, const unsigned char **modbuildid, char *namebuf); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); @@ -740,6 +746,7 @@ static inline const char *module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, + const unsigned char **modbuildid, char *namebuf) { return NULL; -- cgit v1.2.3 From 44e8a5e9120bf4fc1ab046b648b0598e6652c36e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 7 Jul 2021 18:09:49 -0700 Subject: kdump: use vmlinux_build_id to simplify We can use the vmlinux_build_id array here now instead of open coding it. This mostly consolidates code. Link: https://lkml.kernel.org/r/20210511003845.2429846-14-swboyd@chromium.org Signed-off-by: Stephen Boyd Cc: Jiri Olsa Cc: Alexei Starovoitov Cc: Jessica Yu Cc: Evan Green Cc: Hsin-Yi Wang Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Catalin Marinas Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Petr Mladek Cc: Rasmus Villemoes Cc: Sasha Levin Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buildid.h | 2 +- include/linux/crash_core.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 3e8d77a93ec4..3b7a0ff4642f 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -10,7 +10,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; void init_vmlinux_build_id(void); #else diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 206bde8308b2..de62a722431e 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -38,8 +38,12 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_OSRELEASE(value) \ vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_BUILD_ID(value) \ - vmcoreinfo_append_str("BUILD-ID=%s\n", value) +#define VMCOREINFO_BUILD_ID() \ + ({ \ + static_assert(sizeof(vmlinux_build_id) == 20); \ + vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ + }) + #define VMCOREINFO_PAGESIZE(value) \ vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ @@ -69,10 +73,6 @@ extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; -/* raw contents of kernel .notes section */ -extern const void __start_notes __weak; -extern const void __stop_notes __weak; - Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); -- cgit v1.2.3 From 9cf6fa2458443118b84090aa1bf7a3630b5940e8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Jul 2021 18:09:53 -0700 Subject: mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t * No functional change in this patch. [aneesh.kumar@linux.ibm.com: fix] Link: https://lkml.kernel.org/r/87wnqtnb60.fsf@linux.ibm.com [sfr@canb.auug.org.au: another fix] Link: https://lkml.kernel.org/r/20210619134410.89559-1-aneesh.kumar@linux.ibm.com Link: https://lkml.kernel.org/r/20210615110859.320299-1-aneesh.kumar@linux.ibm.com Link: https://lore.kernel.org/linuxppc-dev/CAHk-=wi+J+iodze9FtjM3Zi4j4OeS+qqbKxME9QN4roxPEXH9Q@mail.gmail.com/ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Stephen Rothwell Cc: Christophe Leroy Cc: Hugh Dickins Cc: Joel Fernandes Cc: Kalesh Singh Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable-nopmd.h | 2 +- include/asm-generic/pgtable-nopud.h | 2 +- include/linux/pgtable.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 3e13acd019ae..10789cf51d16 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -51,7 +51,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) #define __pmd(x) ((pmd_t) { __pud(x) } ) #define pud_page(pud) (pmd_page((pmd_t){ pud })) -#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud })) +#define pud_pgtable(pud) ((pmd_t *)(pmd_page_vaddr((pmd_t){ pud }))) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index a9d751fbda9e..7cbd15f70bf5 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -49,7 +49,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) #define __pud(x) ((pud_t) { __p4d(x) }) #define p4d_page(p4d) (pud_page((pud_t){ p4d })) -#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d })) +#define p4d_page_vaddr(p4d) (pud_pgtable((pud_t){ p4d })) /* * allocating and freeing a pud is trivial: the 1-entry pud is diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e82660f7b9e4..c7c992ada1fe 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -106,7 +106,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) #ifndef pmd_offset static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); + return pud_pgtable(*pud) + pmd_index(address); } #define pmd_offset pmd_offset #endif -- cgit v1.2.3 From dc4875f0e791de554bdc45aa1dbd6e45e107e50f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Jul 2021 18:09:56 -0700 Subject: mm: rename p4d_page_vaddr to p4d_pgtable and make it return pud_t * No functional change in this patch. [aneesh.kumar@linux.ibm.com: m68k build error reported by kernel robot] Link: https://lkml.kernel.org/r/87tulxnb2v.fsf@linux.ibm.com Link: https://lkml.kernel.org/r/20210615110859.320299-2-aneesh.kumar@linux.ibm.com Link: https://lore.kernel.org/linuxppc-dev/CAHk-=wi+J+iodze9FtjM3Zi4j4OeS+qqbKxME9QN4roxPEXH9Q@mail.gmail.com/ Signed-off-by: Aneesh Kumar K.V Cc: Christophe Leroy Cc: Hugh Dickins Cc: Joel Fernandes Cc: Kalesh Singh Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable-nop4d.h | 2 +- include/asm-generic/pgtable-nopud.h | 2 +- include/linux/pgtable.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index 2f6b1befb129..03b7dae47dd4 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h @@ -41,7 +41,7 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) #define __p4d(x) ((p4d_t) { __pgd(x) }) #define pgd_page(pgd) (p4d_page((p4d_t){ pgd })) -#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd })) +#define pgd_page_vaddr(pgd) ((unsigned long)(p4d_pgtable((p4d_t){ pgd }))) /* * allocating and freeing a p4d is trivial: the 1-entry p4d is diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 7cbd15f70bf5..eb70c6d7ceff 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -49,7 +49,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) #define __pud(x) ((pud_t) { __p4d(x) }) #define p4d_page(p4d) (pud_page((pud_t){ p4d })) -#define p4d_page_vaddr(p4d) (pud_pgtable((pud_t){ p4d })) +#define p4d_pgtable(p4d) ((pud_t *)(pud_pgtable((pud_t){ p4d }))) /* * allocating and freeing a pud is trivial: the 1-entry pud is diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c7c992ada1fe..d147480cdefc 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -114,7 +114,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #ifndef pud_offset static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); + return p4d_pgtable(*p4d) + pud_index(address); } #define pud_offset pud_offset #endif -- cgit v1.2.3