Age | Commit message (Collapse) | Author |
|
In case the call side is not providing a swap function, we either use a
32 bit or a generic swap function. When swapping around pointers on 64
bit architectures falling back to use the generic swap function seems
like an unnecessary waste.
There at least 9 users ('sort' is of difficult to grep for) of sort()
and all of them use the sort function without a customized swap
function. Furthermore, they are all using pointers to swap around:
arch/x86/kernel/e820.c:sanitize_e820_map()
arch/x86/mm/extable.c:sort_extable()
drivers/acpi/fan.c:acpi_fan_get_fps()
fs/btrfs/super.c:btrfs_descending_sort_devices()
fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block()
kernel/range.c:clean_sort_range()
mm/memcontrol.c:__mem_cgroup_usage_register_event()
sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg()
sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence()
Obviously, we could improve the swap for other sizes as well
but this is overkill at this point.
A simple test shows sorting a 400 element array (try to stay in one
page) with either with u32_swap() or u64_swap() show that the theory
actually works. This test was done on a x86_64 (Intel Xeon E5-4610)
machine.
- swap_32:
NumSamples = 100; Min = 48.00; Max = 49.00
Mean = 48.320000; Variance = 0.217600; SD = 0.466476; Median 48.000000
each * represents a count of 1
48.0000 - 48.1000 [ 68]: ********************************************************************
48.1000 - 48.2000 [ 0]:
48.2000 - 48.3000 [ 0]:
48.3000 - 48.4000 [ 0]:
48.4000 - 48.5000 [ 0]:
48.5000 - 48.6000 [ 0]:
48.6000 - 48.7000 [ 0]:
48.7000 - 48.8000 [ 0]:
48.8000 - 48.9000 [ 0]:
48.9000 - 49.0000 [ 32]: ********************************
- swap_64:
NumSamples = 100; Min = 44.00; Max = 63.00
Mean = 48.250000; Variance = 18.687500; SD = 4.322904; Median 47.000000
each * represents a count of 1
44.0000 - 45.9000 [ 15]: ***************
45.9000 - 47.8000 [ 37]: *************************************
47.8000 - 49.7000 [ 39]: ***************************************
49.7000 - 51.6000 [ 0]:
51.6000 - 53.5000 [ 0]:
53.5000 - 55.4000 [ 0]:
55.4000 - 57.3000 [ 0]:
57.3000 - 59.2000 [ 1]: *
59.2000 - 61.1000 [ 3]: ***
61.1000 - 63.0000 [ 5]: *****
- swap_72:
NumSamples = 100; Min = 53.00; Max = 71.00
Mean = 55.070000; Variance = 21.565100; SD = 4.643824; Median 53.000000
each * represents a count of 1
53.0000 - 54.8000 [ 73]: *************************************************************************
54.8000 - 56.6000 [ 9]: *********
56.6000 - 58.4000 [ 9]: *********
58.4000 - 60.2000 [ 0]:
60.2000 - 62.0000 [ 0]:
62.0000 - 63.8000 [ 0]:
63.8000 - 65.6000 [ 0]:
65.6000 - 67.4000 [ 1]: *
67.4000 - 69.2000 [ 4]: ****
69.2000 - 71.0000 [ 4]: ****
- test program:
static int cmp_32(const void *a, const void *b)
{
u32 l = *(u32 *)a;
u32 r = *(u32 *)b;
if (l < r)
return -1;
if (l > r)
return 1;
return 0;
}
static int cmp_64(const void *a, const void *b)
{
u64 l = *(u64 *)a;
u64 r = *(u64 *)b;
if (l < r)
return -1;
if (l > r)
return 1;
return 0;
}
static int cmp_72(const void *a, const void *b)
{
u32 l = get_unaligned((u32 *) a);
u32 r = get_unaligned((u32 *) b);
if (l < r)
return -1;
if (l > r)
return 1;
return 0;
}
static void init_array32(void *array)
{
u32 *a = array;
int i;
a[0] = 3821;
for (i = 1; i < ARRAY_ELEMENTS; i++)
a[i] = next_pseudo_random32(a[i-1]);
}
static void init_array64(void *array)
{
u64 *a = array;
int i;
a[0] = 3821;
for (i = 1; i < ARRAY_ELEMENTS; i++)
a[i] = next_pseudo_random32(a[i-1]);
}
static void init_array72(void *array)
{
char *p;
u32 v;
int i;
v = 3821;
for (i = 0; i < ARRAY_ELEMENTS; i++) {
p = (char *)array + (i * 9);
put_unaligned(v, (u32*) p);
v = next_pseudo_random32(v);
}
}
static void sort_test(void (*init)(void *array),
int (*cmp) (const void *, const void *),
void *array, size_t size)
{
ktime_t start, stop;
int i;
for (i = 0; i < 10000; i++) {
init(array);
local_irq_disable();
start = ktime_get();
sort(array, ARRAY_ELEMENTS, size, cmp, NULL);
stop = ktime_get();
local_irq_enable();
if (i > 10000 - 101)
pr_info("%lld\n", ktime_to_us(ktime_sub(stop, start)));
}
}
static void *create_array(size_t size)
{
void *array;
array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL);
if (!array)
return NULL;
return array;
}
static int perform_test(size_t size)
{
void *array;
array = create_array(size);
if (!array)
return -ENOMEM;
pr_info("test element size %d bytes\n", (int)size);
switch (size) {
case 4:
sort_test(init_array32, cmp_32, array, size);
break;
case 8:
sort_test(init_array64, cmp_64, array, size);
break;
case 9:
sort_test(init_array72, cmp_72, array, size);
break;
}
kfree(array);
return 0;
}
static int __init sort_tests_init(void)
{
int err;
err = perform_test(sizeof(u32));
if (err)
return err;
err = perform_test(sizeof(u64));
if (err)
return err;
err = perform_test(sizeof(u64)+1);
if (err)
return err;
return 0;
}
static void __exit sort_tests_exit(void)
{
}
module_init(sort_tests_init);
module_exit(sort_tests_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Daniel Wagner");
MODULE_DESCRIPTION("sort perfomance tests");
Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
The sort function and its helpers don't do memory allocation, so the
slab.h include is redundant. Move it inside the #if 0 protecting the
self-test, similar to how it is done in lib/list_sort.c. This removes
over 450 lines from the generated dependency file.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
sort.c doesn't use facilities from kernel.h, but does use some types
defined in linux/types.h. Include the latter directly instead of relying
on some other header doing it. Similarly, include linux/export.h directly
instead of through module.h. This removes 80 lines from the dependency
file .sort.o.cmd.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
This is to avoid name clashes for the introduction of a global swap()
macro.
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Hello, I fixed and tested a small bug in lib/sort.c file, heap sort
function.
The fix avoids unnecessary swap of contents when i is 0 (saves few loads
and stores), which happens every time sort function is called. I felt the
fix is worth bringing it to your attention given the importance and
frequent use of the sort function.
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
A variety of (mostly) innocuous fixes to the embedded kernel-doc content in
source files, including:
* make multi-line initial descriptions single line
* denote some function names, constants and structs as such
* change erroneous opening '/*' to '/**' in a few places
* reword some text for clarity
Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
It is a non-standard heap-sort algorithm implementation because the index
of child node is wrong . The sort function still outputs right result, but
the performance is O( n * ( log(n) + 1 ) ) , about 10% ~ 20% worse than
standard algorithm.
Signed-off-by: keios <keios.cn@gmail.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Acked-by: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
|
|
I recently picked up my older work to remove unnecessary #includes of
sched.h, starting from a patch by Dave Jones to not include sched.h
from module.h. This reduces the number of indirect includes of sched.h
by ~300. Another ~400 pointless direct includes can be removed after
this disentangling (patch to follow later).
However, quite a few indirect includes need to be fixed up for this.
In order to feed the patches through -mm with as little disturbance as
possible, I've split out the fixes I accumulated up to now (complete for
i386 and x86_64, more archs to follow later) and post them before the real
patch. This way this large part of the patch is kept simple with only
adding #includes, and all hunks are independent of each other. So if any
hunk rejects or gets in the way of other patches, just drop it. My scripts
will pick it up again in the next round.
Signed-off-by: Tim Schmielau <tim@physik3.uni-rostock.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
|
|
This patch contains the following small cleanups:
- make two needlessly global functions static
- every file should #include the header files containing the prototypes
of it's global functions
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
|
|
The regression test in lib/sort.c is currently worthless because the array
that is generated for sorting will be all zeros. This patch fixes things
so that the array that is generated will contain unsorted integers (that
are not all identical) as was probably intended.
Signed-off-by Daniel Dickman <didickman@yahoo.com>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
|
|
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
|