From c957154289f87196d4f44fa805e236564cbb91f0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 30 Jan 2020 22:13:57 -0800 Subject: [PATCH] mm/page_alloc: skip non present sections on zone initialization memmap_init_zone() can be called on the ranges with holes during the boot. It will skip any non-valid PFNs one-by-one. It works fine as long as holes are not too big. But huge holes in the memory map causes a problem. It takes over 20 seconds to walk 32TiB hole. x86-64 with 5-level paging allows for much larger holes in the memory map which would practically hang the system. Deferred struct page init doesn't help here. It only works on the present ranges. Skipping non-present sections would fix the issue. Link: http://lkml.kernel.org/r/20191230093828.24613-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Baoquan He Acked-by: Michal Hocko Cc: Dan Williams Cc: Vlastimil Babka Cc: Mel Gorman Cc: "Jin, Zhi" Cc: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Git-commit: 3f1353552e256cb3bf0f23334c48f50476044673 Git-repo: git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git [jiaczhen@codeaurora.org: resolve trivial merge conflicts] Change-Id: Ia1b43e107c2ee3b61ba5f43f8d580d8e6c37ded4 Signed-off-by: Jiacheng Zheng --- mm/page_alloc.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c80da801b234..bfcbb43fd7d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5471,6 +5471,30 @@ void __ref build_all_zonelists(pg_data_t *pgdat) #endif } +#ifdef CONFIG_SPARSEMEM +/* Skip PFNs that belong to non-present sections */ +static inline __meminit unsigned long next_pfn(unsigned long pfn) +{ + unsigned long section_nr; + + section_nr = pfn_to_section_nr(++pfn); + if (present_section_nr(section_nr)) + return pfn; + + while (++section_nr <= __highest_present_section_nr) { + if (present_section_nr(section_nr)) + return section_nr_to_pfn(section_nr); + } + + return -1; +} +#else +static inline __meminit unsigned long next_pfn(unsigned long pfn) +{ + return pfn++; +} +#endif + /* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is @@ -5506,8 +5530,10 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context != MEMMAP_EARLY) goto not_early; - if (!early_pfn_valid(pfn)) + if (!early_pfn_valid(pfn)) { + pfn = next_pfn(pfn) - 1; continue; + } if (!early_pfn_in_nid(pfn, nid)) continue; if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))