mm: vmpressure: Don't cache the window size

Caching the window size can result in delayed or inaccurate pressure
reports. Since calculating a fresh window size is cheap, do so all the
time instead of relying on a stale, cached value.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Ruchit <ruchitmarathe@gmail.com>
fourteen
Sultan Alsawaf 5 years ago committed by Jenna
parent 26a1450ce9
commit 5440e78367
  1. 110
      mm/vmpressure.c

@ -27,22 +27,6 @@
#include <linux/module.h>
#include <linux/vmpressure.h>
/*
* The window size (vmpressure_win) is the number of scanned pages before
* we try to analyze scanned/reclaimed ratio. So the window is used as a
* rate-limit tunable for the "low" level notification, and also for
* averaging the ratio for medium/critical levels. Using small window
* sizes can cause lot of false positives, but too big window size will
* delay the notifications.
*
* As the vmscan reclaimer logic works with chunks which are multiple of
* SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well.
*
* TODO: Make the window size depend on machine size, as we do for vmstat
* thresholds. Currently we set it to 512 pages (2MB for 4KB pages).
*/
static unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
/*
* These thresholds are used when we account memory pressure through
* scanned/reclaimed ratio. The current values were chosen empirically. In
@ -272,9 +256,32 @@ static void vmpressure_work_fn(struct work_struct *work)
} while ((vmpr = vmpressure_parent(vmpr)));
}
static unsigned long calculate_vmpressure_win(void)
{
long x;
x = global_node_page_state(NR_FILE_PAGES) -
global_node_page_state(NR_SHMEM) -
total_swapcache_pages() +
global_zone_page_state(NR_FREE_PAGES);
if (x < 1)
return 1;
/*
* For low (free + cached), vmpressure window should be
* small, and high for higher values of (free + cached).
* But it should not be linear as well. This ensures
* timely vmpressure notifications when system is under
* memory pressure, and optimal number of events when
* cached is high. The sqaure root function is empirically
* found to serve the purpose.
*/
return int_sqrt(x);
}
#ifdef CONFIG_MEMCG
static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool critical,
bool tree, unsigned long scanned,
unsigned long reclaimed)
{
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
@ -286,7 +293,9 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
* (scanning depth) goes too high (deep), we will be notified
* through vmpressure_prio(). But so far, keep calm.
*/
if (!scanned)
if (critical)
scanned = calculate_vmpressure_win();
else if (!scanned)
return;
if (tree) {
@ -295,7 +304,7 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
vmpr->tree_reclaimed += reclaimed;
spin_unlock(&vmpr->sr_lock);
if (scanned < vmpressure_win)
if (!critical && scanned < calculate_vmpressure_win())
return;
schedule_work(&vmpr->work);
} else {
@ -309,7 +318,7 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
spin_lock(&vmpr->sr_lock);
scanned = vmpr->scanned += scanned;
reclaimed = vmpr->reclaimed += reclaimed;
if (scanned < vmpressure_win) {
if (!critical && scanned < calculate_vmpressure_win()) {
spin_unlock(&vmpr->sr_lock);
return;
}
@ -333,47 +342,23 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
}
}
#else
static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
{
}
static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool critical,
bool tree, unsigned long scanned,
unsigned long reclaimed) { }
#endif
static void calculate_vmpressure_win(void)
{
long x;
x = global_node_page_state(NR_FILE_PAGES) -
global_node_page_state(NR_SHMEM) -
total_swapcache_pages() +
global_zone_page_state(NR_FREE_PAGES);
if (x < 1)
x = 1;
/*
* For low (free + cached), vmpressure window should be
* small, and high for higher values of (free + cached).
* But it should not be linear as well. This ensures
* timely vmpressure notifications when system is under
* memory pressure, and optimal number of events when
* cached is high. The sqaure root function is empirically
* found to serve the purpose.
*/
x = int_sqrt(x);
vmpressure_win = x;
}
static void vmpressure_global(gfp_t gfp, unsigned long scanned,
static void vmpressure_global(gfp_t gfp, unsigned long scanned, bool critical,
unsigned long reclaimed)
{
struct vmpressure *vmpr = &global_vmpressure;
unsigned long pressure;
unsigned long stall;
if (critical)
scanned = calculate_vmpressure_win();
if (scanned) {
spin_lock(&vmpr->sr_lock);
if (!vmpr->scanned)
calculate_vmpressure_win();
vmpr->scanned += scanned;
vmpr->reclaimed += reclaimed;
@ -385,7 +370,7 @@ static void vmpressure_global(gfp_t gfp, unsigned long scanned,
reclaimed = vmpr->reclaimed;
spin_unlock(&vmpr->sr_lock);
if (scanned < vmpressure_win)
if (!critical && scanned < calculate_vmpressure_win())
return;
}
@ -404,6 +389,17 @@ static void vmpressure_global(gfp_t gfp, unsigned long scanned,
vmpressure_notify(pressure);
}
static void __vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool critical,
bool tree, unsigned long scanned,
unsigned long reclaimed)
{
if (!memcg && tree)
vmpressure_global(gfp, scanned, critical, reclaimed);
if (IS_ENABLED(CONFIG_MEMCG))
vmpressure_memcg(gfp, memcg, critical, tree, scanned, reclaimed);
}
/**
* vmpressure() - Account memory pressure through scanned/reclaimed ratio
* @gfp: reclaimer's gfp mask
@ -428,11 +424,7 @@ static void vmpressure_global(gfp_t gfp, unsigned long scanned,
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed)
{
if (!memcg && tree)
vmpressure_global(gfp, scanned, reclaimed);
if (IS_ENABLED(CONFIG_MEMCG))
vmpressure_memcg(gfp, memcg, tree, scanned, reclaimed);
__vmpressure(gfp, memcg, false, tree, scanned, reclaimed);
}
/**
@ -462,7 +454,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* to the vmpressure() basically means that we signal 'critical'
* level.
*/
vmpressure(gfp, memcg, true, vmpressure_win, 0);
__vmpressure(gfp, memcg, true, true, 0, 0);
}
static enum vmpressure_levels str_to_level(const char *arg)

Loading…
Cancel
Save