From f20b737ad567e68a3a352f64b3c287a1c12e0299 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 19 Feb 2020 14:47:13 -0800 Subject: [PATCH] mm: Stop kswapd early when nothing's waiting for it to free pages Keeping kswapd running when all the failed allocations that invoked it are satisfied incurs a high overhead due to unnecessary page eviction and writeback, as well as spurious VM pressure events to various registered shrinkers. When kswapd doesn't need to work to make an allocation succeed anymore, stop it prematurely to save resources. Signed-off-by: Sultan Alsawaf Signed-off-by: Ruchit --- include/linux/mmzone.h | 1 + mm/page_alloc.c | 17 ++++++++++++++--- mm/vmscan.c | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a6f1148085c2..c4dae0bcc43b 100755 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -664,6 +664,7 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; + atomic_t kswapd_waiters; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; struct task_struct *kswapd; /* Protected by diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1d660049a149..ba94fdbe3061 100755 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4087,6 +4087,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, u64 utime, stime_s, stime_e, stime_d; task_cputime(current, &utime, &stime_s); + pg_data_t *pgdat = ac->preferred_zoneref->zone->zone_pgdat; + bool woke_kswapd = false; /* * We also sanity check to catch abuse of atomic reserves being used by @@ -4120,8 +4122,13 @@ retry_cpuset: if (!ac->preferred_zoneref->zone) goto nopage; - if (gfp_mask & __GFP_KSWAPD_RECLAIM) + if (gfp_mask & __GFP_KSWAPD_RECLAIM) { + if (!woke_kswapd) { + atomic_inc(&pgdat->kswapd_waiters); + woke_kswapd = true; + } wake_all_kswapds(order, ac); + } /* * The adjusted alloc_flags might result in immediate success, so try @@ -4321,8 +4328,6 @@ nopage: goto retry; } fail: - warn_alloc(gfp_mask, ac->nodemask, - "page allocation failure: order:%u", order); got_pg: task_cputime(current, &utime, &stime_e); stime_d = stime_e - stime_s; @@ -4347,6 +4352,11 @@ got_pg: a_anon << (PAGE_SHIFT-10), in_anon << (PAGE_SHIFT-10), a_file << (PAGE_SHIFT-10), in_file << (PAGE_SHIFT-10)); } + if (woke_kswapd) + atomic_dec(&pgdat->kswapd_waiters); + if (!page) + warn_alloc(gfp_mask, ac->nodemask, + "page allocation failure: order:%u", order); return page; } @@ -6323,6 +6333,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) pgdat_page_ext_init(pgdat); spin_lock_init(&pgdat->lru_lock); lruvec_init(node_lruvec(pgdat)); + pgdat->kswapd_waiters = (atomic_t)ATOMIC_INIT(0); pgdat->per_cpu_nodestats = &boot_nodestats; diff --git a/mm/vmscan.c b/mm/vmscan.c index 5c2d5970af0b..93253daef11e 100755 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3695,7 +3695,8 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ - if (try_to_freeze() || kthread_should_stop()) + if (try_to_freeze() || kthread_should_stop() || + !atomic_read(&pgdat->kswapd_waiters)) break; /*