scatterlist: Don't allocate sg lists using __get_free_page

Allocating pages with __get_free_page is slower than going through the slab allocator to grab free pages out from a pool. These are the results from running the code at the bottom of this message: [ 1.278602] speedtest: __get_free_page: 9 us [ 1.278606] speedtest: kmalloc: 4 us [ 1.278609] speedtest: kmem_cache_alloc: 4 us [ 1.278611] speedtest: vmalloc: 13 us kmalloc and kmem_cache_alloc (which is what kmalloc uses for common sizes behind the scenes) are the fastest choices. Use kmalloc to speed up sg list allocation. This is the code used to produce the above measurements: #include <linux/kthread.h> #include <linux/slab.h> #include <linux/vmalloc.h> static int speedtest(void *data) { static const struct sched_param sched_max_rt_prio = { .sched_priority = MAX_RT_PRIO - 1 }; volatile s64 ctotal = 0, gtotal = 0, ktotal = 0, vtotal = 0; struct kmem_cache *page_pool; int i, j, trials = 1000; volatile ktime_t start; void *ptr[100]; sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_max_rt_prio); page_pool = kmem_cache_create("pages", PAGE_SIZE, PAGE_SIZE, SLAB_PANIC, NULL); for (i = 0; i < trials; i++) { start = ktime_get(); for (j = 0; j < ARRAY_SIZE(ptr); j++) while (!(ptr[j] = kmem_cache_alloc(page_pool, GFP_KERNEL))); ctotal += ktime_us_delta(ktime_get(), start); for (j = 0; j < ARRAY_SIZE(ptr); j++) kmem_cache_free(page_pool, ptr[j]); start = ktime_get(); for (j = 0; j < ARRAY_SIZE(ptr); j++) while (!(ptr[j] = (void *)__get_free_page(GFP_KERNEL))); gtotal += ktime_us_delta(ktime_get(), start); for (j = 0; j < ARRAY_SIZE(ptr); j++) free_page((unsigned long)ptr[j]); start = ktime_get(); for (j = 0; j < ARRAY_SIZE(ptr); j++) while (!(ptr[j] = kmalloc(PAGE_SIZE, GFP_KERNEL))); ktotal += ktime_us_delta(ktime_get(), start); for (j = 0; j < ARRAY_SIZE(ptr); j++) kfree(ptr[j]); start = ktime_get(); *ptr = vmalloc(ARRAY_SIZE(ptr) * PAGE_SIZE); vtotal += ktime_us_delta(ktime_get(), start); vfree(*ptr); } kmem_cache_destroy(page_pool); printk("%s: __get_free_page: %lld us\n", __func__, gtotal / trials); printk("%s: kmalloc: %lld us\n", __func__, ktotal / trials); printk("%s: kmem_cache_alloc: %lld us\n", __func__, ctotal / trials); printk("%s: vmalloc: %lld us\n", __func__, vtotal / trials); complete(data); return 0; } static int __init start_test(void) { DECLARE_COMPLETION_ONSTACK(done); BUG_ON(IS_ERR(kthread_run(speedtest, &done, "malloc_test"))); wait_for_completion(&done); return 0; } late_initcall(start_test); Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> Signed-off-by: Ruchit <ruchitmarathe@gmail.com>
6 years ago · 690fe03782
parent 66822a0c1a
commit 690fe03782
1 changed files with 2 additions and 20 deletions
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@ -163,30 +163,12 @@ EXPORT_SYMBOL(sg_init_one);
 */
 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
 {
-	if (nents == SG_MAX_SINGLE_ALLOC) {
-		/*
-		 * Kmemleak doesn't track page allocations as they are not
-		 * commonly used (in a raw form) for kernel data structures.
-		 * As we chain together a list of pages and then a normal
-		 * kmalloc (tracked by kmemleak), in order to for that last
-		 * allocation not to become decoupled (and thus a
-		 * false-positive) we need to inform kmemleak of all the
-		 * intermediate allocations.
-		 */
-		void *ptr = (void *) __get_free_page(gfp_mask);
-		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
-		return ptr;
-	} else
-		return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
+	return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
 }

 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
 {
-	if (nents == SG_MAX_SINGLE_ALLOC) {
-		kmemleak_free(sg);
-		free_page((unsigned long) sg);
-	} else
-		kfree(sg);
+	kfree(sg);
 }

 /**