From 9b130ad5bb8255ee8534d92d67e12b2a4887eacb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 8 Sep 2017 16:14:18 -0700 Subject: treewide: make "nr_cpu_ids" unsigned First, number of CPUs can't be negative number. Second, different signnnedness leads to suboptimal code in the following cases: 1) kmalloc(nr_cpu_ids * sizeof(X)); "int" has to be sign extended to size_t. 2) while (loff_t *pos < nr_cpu_ids) MOVSXD is 1 byte longed than the same MOV. Other cases exist as well. Basically compiler is told that nr_cpu_ids can't be negative which can't be deduced if it is "int". Code savings on allyesconfig kernel: -3KB add/remove: 0/0 grow/shrink: 25/264 up/down: 261/-3631 (-3370) function old new delta coretemp_cpu_online 450 512 +62 rcu_init_one 1234 1272 +38 pci_device_probe 374 399 +25 ... pgdat_reclaimable_pages 628 556 -72 select_fallback_rq 446 369 -77 task_numa_find_cpu 1923 1807 -116 Link: http://lkml.kernel.org/r/20170819114959.GA30580@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/cpumask.h') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4bf4479a3a80..68c5a8290275 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -32,15 +32,15 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) #if NR_CPUS == 1 -#define nr_cpu_ids 1 +#define nr_cpu_ids 1U #else -extern int nr_cpu_ids; +extern unsigned int nr_cpu_ids; #endif #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ -#define nr_cpumask_bits ((unsigned int)nr_cpu_ids) +#define nr_cpumask_bits nr_cpu_ids #else #define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif -- cgit v1.2.3 From f22ef333c32cc683922d7e3361a83ebc31b2ac6d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 8 Sep 2017 16:17:15 -0700 Subject: cpumask: make cpumask_next() out-of-line Every for_each_XXX_cpu() invocation calls cpumask_next() which is an inline function: static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); } However! find_next_bit() is regular out-of-line function which means "nr_cpu_ids" load and increment happen at the caller resulting in a lot of bloat x86_64 defconfig: add/remove: 3/0 grow/shrink: 8/373 up/down: 155/-5668 (-5513) x86_64 allyesconfig-ish: add/remove: 3/1 grow/shrink: 57/634 up/down: 3515/-28177 (-24662) !!! Some archs redefine find_next_bit() but it is OK: m68k inline but SMP is not supported arm out-of-line unicore32 out-of-line Function call will happen anyway, so move load and increment into callee. Link: http://lkml.kernel.org/r/20170824230010.GA1593@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux/cpumask.h') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 68c5a8290275..cd415b733c2a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -178,20 +178,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } -/** - * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) - * @srcp: the cpumask pointer - * - * Returns >= nr_cpu_ids if no further cpus set. - */ -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); -} +unsigned int cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask -- cgit v1.2.3