From e37af6050230fc2a8c1a54317e3c730e78c46b1c Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Wed, 20 May 2020 09:55:17 -0700 Subject: [PATCH] mm: Don't stop kswapd on a per-node basis when there are no waiters The page allocator wakes all kswapds in an allocation context's allowed nodemask in the slow path, so it doesn't make sense to have the kswapd- waiter count per each NUMA node. Instead, it should be a global counter to stop all kswapds when there are no failed allocation requests. Signed-off-by: Sultan Alsawaf Signed-off-by: Ruchit --- include/linux/mmzone.h | 1 - mm/internal.h | 1 + mm/page_alloc.c | 8 ++++---- mm/vmscan.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c4dae0bcc43b..a6f1148085c2 100755 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -664,7 +664,6 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; - atomic_t kswapd_waiters; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; struct task_struct *kswapd; /* Protected by diff --git a/mm/internal.h b/mm/internal.h index 7a9fe8fd1558..7cc88ece2204 100755 --- a/mm/internal.h +++ b/mm/internal.h @@ -187,6 +187,7 @@ extern void prep_compound_page(struct page *page, unsigned int order); extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; +extern atomic_long_t kswapd_waiters; #if defined CONFIG_COMPACTION || defined CONFIG_CMA diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a1dba6e830c3..51de840d394d 100755 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -76,6 +76,8 @@ #include #include "internal.h" +atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0); + /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_FRACTION (8) @@ -4087,7 +4089,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, u64 utime, stime_s, stime_e, stime_d; task_cputime(current, &utime, &stime_s); - pg_data_t *pgdat = ac->preferred_zoneref->zone->zone_pgdat; bool woke_kswapd = false; /* @@ -4124,7 +4125,7 @@ retry_cpuset: if (gfp_mask & __GFP_KSWAPD_RECLAIM) { if (!woke_kswapd) { - atomic_inc(&pgdat->kswapd_waiters); + atomic_long_inc(&kswapd_waiters); woke_kswapd = true; } wake_all_kswapds(order, ac); @@ -4355,7 +4356,7 @@ got_pg: a_file << (PAGE_SHIFT-10), in_file << (PAGE_SHIFT-10)); } if (woke_kswapd) - atomic_dec(&pgdat->kswapd_waiters); + atomic_long_dec(&kswapd_waiters); if (!page) warn_alloc(gfp_mask, ac->nodemask, "page allocation failure: order:%u", order); @@ -6335,7 +6336,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) pgdat_page_ext_init(pgdat); spin_lock_init(&pgdat->lru_lock); lruvec_init(node_lruvec(pgdat)); - pgdat->kswapd_waiters = (atomic_t)ATOMIC_INIT(0); pgdat->per_cpu_nodestats = &boot_nodestats; diff --git a/mm/vmscan.c b/mm/vmscan.c index a35bd09a1992..068285f41938 100755 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3696,7 +3696,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) /* Check if kswapd should be suspending */ if (try_to_freeze() || kthread_should_stop() || - !atomic_read(&pgdat->kswapd_waiters)) + !atomic_long_read(&kswapd_waiters)) break; /*