From 3dc6e6105cfb86df542adab579ff8ebcc10e462b Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Mon, 30 Aug 2021 15:16:52 +0900 Subject: [PATCH 01/16] arm64: defconfig: enable finegrained-THP configuration Enable finegrained-THP to allow creation of 64KB hugepages. Change-Id: If23d2489571ac0a5c367db54126b7b6abf42eebc Signed-off-by: Sung-hun Kim --- arch/arm64/configs/tizen_bcm2711_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/tizen_bcm2711_defconfig b/arch/arm64/configs/tizen_bcm2711_defconfig index 8365e9a..9a1e6e2 100644 --- a/arch/arm64/configs/tizen_bcm2711_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_defconfig @@ -69,6 +69,7 @@ CONFIG_FRONTSWAP=y CONFIG_CMA=y CONFIG_ZSMALLOC=y CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_FINEGRAINED_THP=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y -- 2.7.4 From 6a0f555f06b0ce2e992ce6f00a51da6c94ac7d39 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 8 Sep 2021 15:58:07 +0900 Subject: [PATCH 02/16] uapi: THP: remove possible leak of CONFIG_FINEGRAINED_THP to user-space CONFIG_FINEGRAINED_THP is presented in a header file (asm-generic/mman-common.h) of uapi. It generates a build error for headers with leak CONFIG to user-space message. This patch remove a possible leak of the kernel configuration. Change-Id: I16173eaf5094cc07312f10fb33a22dd73d67ff88 Fixes: 7d5372737d34 ("mm: THP: introducing a fine-grained transparent hugepage technique for ARM64 architecture") Signed-off-by: Sung-hun Kim --- include/uapi/asm-generic/mman-common.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index f5d33b8..2073cb1 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -30,9 +30,7 @@ #define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ -#ifdef CONFIG_FINEGRAINED_THP #define MAP_FILE_THP 0x200000 /* MAP_FIXED which doesn't unmap underlying mapping */ -#endif #define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */ -- 2.7.4 From 67144542252f40de5991a6a2bf171d2a174b4ead Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 8 Sep 2021 16:59:12 +0900 Subject: [PATCH 03/16] mm: THP: meminfo: modify areas of kernel configurations This commit modifies coverages of ifdef macros of CONFIG_TRANSPARENT_HUGEPAGE and CONFIG_FINEGRAINED_THP to build properly by removing dependency problems. Fixes: 7d5372737d34 ("mm: THP: introducing a fine-grained transparent hugepage technique for ARM64 architecture") Change-Id: Id5e692eb2f89a0f93c696e9c20339940f7107874 Signed-off-by: Sung-hun Kim --- fs/proc/meminfo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 9a782664..014f197 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -151,13 +151,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_FINEGRAINED_THP show_val_kb(m, "FileCPteMapped: ", global_node_page_state(NR_FILE_PTEMAPPED) * HPAGE_CONT_PTE_NR); -#endif /* CONFIG_FINEGRAINED_THP */ -#endif show_val_kb(m, "PhysCPteMapped: ", phys_cont_pte_pages()); +#endif /* CONFIG_FINEGRAINED_THP */ show_val_kb(m, "PhysPmdMapped: ", phys_huge_pmd_pages() * HPAGE_PMD_NR); - +#endif #ifdef CONFIG_CMA show_val_kb(m, "CmaTotal: ", totalcma_pages); show_val_kb(m, "CmaFree: ", -- 2.7.4 From 634e69909cae9133097ea11751eaa15c6ff6ff78 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 8 Sep 2021 17:05:14 +0900 Subject: [PATCH 04/16] mm: THP: memory: remove unnecessary function calls The called function is only used for finegrained-THP, so remove it for unconfigured cases. Fixes: 7d5372737d34 ("mm: THP: introducing a fine-grained transparent hugepage technique for ARM64 architecture") Change-Id: I138b135980acf79e695731f4d42399ca203c4ca6 Signed-off-by: Sung-hun Kim --- mm/memory.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 085287f..08336046 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2230,7 +2230,6 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long pfn, pgprot_t prot) { pte_t *pte, *mapped_pte; - unsigned long next; spinlock_t *ptl; int err = 0; @@ -2245,7 +2244,6 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, break; } - next = pte_cont_addr_end(addr, end); set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; pte++; -- 2.7.4 From f17e2a879df716db843d744ecf8782d1bcaf13c6 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 8 Sep 2021 17:07:36 +0900 Subject: [PATCH 05/16] asm-generic: THP: fix bugs in asm-generic headers Since a function is defined in two headers at the same time, the kernel build is failed. And, a vm_fault_t-typed function does not return any values. This patch fixes two bugs in asm-generic headers. Fixes: 7d5372737d34 ("mm: THP: introducing a fine-grained transparent hugepage technique for ARM64 architecture") Change-Id: I84110bbb6c7f5b0794c55b3aca98419f12469eca Signed-off-by: Sung-hun Kim --- include/asm-generic/finegrained_thp.h | 1 + include/asm-generic/huge_mm.h | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/finegrained_thp.h b/include/asm-generic/finegrained_thp.h index 08a3461..35c9031 100644 --- a/include/asm-generic/finegrained_thp.h +++ b/include/asm-generic/finegrained_thp.h @@ -1,6 +1,7 @@ /* a generic header for fine-grained thp */ #ifndef __ASM_FINEGRAINED_THP_H #define __ASM_FINEGRAINED_THP_H +#ifndef CONFIG_FINEGRAINED_THP static inline void khugepaged_mem_hook(struct mm_struct *mm, unsigned long addr, long diff, const char *debug) {} diff --git a/include/asm-generic/huge_mm.h b/include/asm-generic/huge_mm.h index 48527cf..6714d0e 100644 --- a/include/asm-generic/huge_mm.h +++ b/include/asm-generic/huge_mm.h @@ -24,13 +24,11 @@ static inline pte_t arch_make_huge_pte(struct page *hpage, return mk_pte(hpage, vma->vm_page_prot); } -static inline void khugepaged_mem_hook(struct mm_struct *mm, - unsigned long addr, long diff, const char *debug) -{} - static inline vm_fault_t arch_do_set_huge_pte(struct vm_fault *vmf, struct page *page) -{} +{ + return VM_FAULT_FALLBACK; +} static inline void arch_set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long headoff) -- 2.7.4 From 04519e317c60d1d245443b74ae0fef2af1d6241f Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 8 Sep 2021 17:18:37 +0900 Subject: [PATCH 06/16] mm: THP: workaround: only allow including specific headers for FINEGRAINED_THP configured cases asm/huge_mm.h and asm/finegrained_thp.h are only used for FINEGRAINED_THP-enabled kernel. Otherwise, such as arm which does not support contiguous PTE bit, disallow including them. Fixes: 7d5372737d34 ("mm: THP: introducing a fine-grained transparent hugepage technique for ARM64 architecture") Change-Id: I37c2bc46106711f4b7ee33a6838d87e929e13247 Signed-off-by: Sung-hun Kim --- mm/khugepaged.c | 2 ++ mm/memory.c | 5 +++++ mm/mmap.c | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index aa96e8e..34f0c40 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -21,8 +21,10 @@ #include #include +#ifdef CONFIG_FINEGRAINED_THP #include #include +#endif #include "internal.h" enum scan_result { diff --git a/mm/memory.c b/mm/memory.c index 08336046..bdf18e9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,8 +82,13 @@ #include #include #include +#ifdef CONFIG_FINEGRAINED_THP #include #include +#else +#include +#include +#endif #include "pgalloc-track.h" #include "internal.h" diff --git a/mm/mmap.c b/mm/mmap.c index 02eb014..cca7268 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -52,7 +52,11 @@ #include #include #include +#ifdef CONFIG_FINEGRAINED_THP #include +#else +#include +#endif #define CREATE_TRACE_POINTS #include -- 2.7.4 From 77427aa27cc83043be034d102525002f50bbf05f Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Mon, 13 Sep 2021 12:19:39 +0900 Subject: [PATCH 07/16] mm: THP: workaround: fix a build error occurred if FINEGRAINED_THP is disabled Fixes: 04519e317c60 ('mm: THP: workaround: only allow including specific headers for FINEGRAINED_THP configured cases') Change-Id: Iec1678cb5c45708865a1d18fef88807e7fd47870 Signed-off-by: Sung-hun Kim --- mm/khugepaged.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 34f0c40..99cc150 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -24,6 +24,9 @@ #ifdef CONFIG_FINEGRAINED_THP #include #include +#else +#include +#include #endif #include "internal.h" -- 2.7.4 From 8690fa3fc22ac74304f26441798e540f8f929926 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 15 Sep 2021 13:28:04 +0900 Subject: [PATCH 08/16] mm, meminfo: modify page counting Two counters, nr_phys_huge_pmd_pages and nr_phys_cont_pte_pages, are counted by different units. This patch enforces two counters counted by pages, not huge pages. Change-Id: I1fcb6a1a9c3a60c956b861e79ec3714a33004991 Signed-off-by: Sung-hun Kim --- fs/proc/meminfo.c | 2 +- mm/huge_memory.c | 4 ++-- mm/memory.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 014f197..abc072ba 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -155,7 +155,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) phys_cont_pte_pages()); #endif /* CONFIG_FINEGRAINED_THP */ show_val_kb(m, "PhysPmdMapped: ", - phys_huge_pmd_pages() * HPAGE_PMD_NR); + phys_huge_pmd_pages()); #endif #ifdef CONFIG_CMA show_val_kb(m, "CmaTotal: ", totalcma_pages); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 20ea663..23d21e5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1670,7 +1670,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (vma_is_special_huge(vma)) { if (arch_needs_pgtable_deposit()) zap_deposited_table(tlb->mm, pmd); - atomic_long_dec(&nr_phys_huge_pmd_pages); + atomic_long_add(-HPAGE_PMD_NR, &nr_phys_huge_pmd_pages); spin_unlock(ptl); if (is_huge_zero_pmd(orig_pmd)) tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE); @@ -2281,7 +2281,7 @@ repeat: pmd_t orig_pmd; orig_pmd = pmdp_huge_get_and_clear_full(vma, haddr, pmd, 0); - atomic_long_dec(&nr_phys_huge_pmd_pages); + atomic_long_add(-HPAGE_PMD_NR, &nr_phys_huge_pmd_pages); thp_remap_pte_range_locked(mm, pmd, haddr, haddr + HPAGE_PMD_SIZE, pmd_pfn(orig_pmd), diff --git a/mm/memory.c b/mm/memory.c index bdf18e9..e6deee2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2287,7 +2287,7 @@ static int remap_try_huge_pmd(struct mm_struct *mm, pmd_t *pmd, unsigned long ad ret = pmd_set_huge(pmd, phys_addr, prot); spin_unlock(ptl); - atomic_long_inc(&nr_phys_huge_pmd_pages); + atomic_long_add(HPAGE_PMD_NR, &nr_phys_huge_pmd_pages); return ret; } -- 2.7.4 From 93cdd04abf4b2522392f658de1ed35a602c0e945 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Wed, 15 Sep 2021 15:07:00 +0900 Subject: [PATCH 09/16] Partially Revert "brcmfmac: p2p: Deal with set but unused variables" This partially reverts commit 2de64ca7c9fadd32b261530592db4a6adbfcb53f. The commit 61325dc073e2 ("Revert "brcmfmac: move configuration of probe request IEs"") requires vif set with p2p interface, but commit 2de64ca7c9fa removes setting. Partially revert the commit to support p2p usage with p2p interface. Change-Id: Ia90e256c3d10396b1018e3aec8145139accfb39e Reported-by: Jiung Yu Signed-off-by: Seung-Woo Kim --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index b08d2ca..942bd53 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -912,6 +912,8 @@ int brcmf_p2p_scan_prep(struct wiphy *wiphy, if (err) return err; + vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif; + /* override .run_escan() callback. */ cfg->escan_info.run = brcmf_p2p_run_escan; } -- 2.7.4 From be97c7c0fb8de0bc2dfc7bf82bf02bcc11142ae0 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Wed, 15 Sep 2021 16:39:13 +0900 Subject: [PATCH 10/16] mm, thp: hide remap_try_huge_pmd for the THP-disabled kernel Since remap_try_huge_pmd is dependent on the kernel configuration CONFIG_TRANSPARENT_HUGEPAGE, it should be hidden when the kernel configuration is disabled. Fixes: 8690fa3fc22a ('mm, meminfo: modify page counting') Change-Id: Iae9efb2edf6cd563c794af68bea7987110a5b2da Signed-off-by: Sung-hun Kim --- mm/memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index e6deee2..f1e5eb9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2259,7 +2259,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, return err; } #endif /* CONFIG_FINEGRAINED_THP */ - +#ifdef CONFIG_TRANSPARENT_HUGEPAGE static int remap_try_huge_pmd(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) @@ -2291,6 +2291,7 @@ static int remap_try_huge_pmd(struct mm_struct *mm, pmd_t *pmd, unsigned long ad return ret; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, @@ -2308,10 +2309,11 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, do { next = pmd_addr_end(addr, end); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE if (remap_try_huge_pmd(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot)) continue; - +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ err = remap_pte_range(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) -- 2.7.4 From 78df7c9b0b1662288349db6cd2de55d76e56929a Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Thu, 16 Sep 2021 13:44:25 +0900 Subject: [PATCH 11/16] mm, thp, migrate: handling migration of 64KB hugepages When a 64KB hugepage is migrated, it should be properly handled since it is different from other normal page mappings. The kernel should handle a set of sequential 16 page mappings at once. If not, the kernel can mishandle map counts of a compound page (that is, a set of pages). It can be a source of kernel bugs and the bug is easily reproduced on low-memory devices. This patch deals with the migration of 64KB hugepages. Fixes: 7d5372737d34 ('mm: THP: introducing a fine-grained transparent hugepage technique for ARM64 architecture') Change-Id: I50a5d4e9a263e7dcbded15c982f57c15a3a48f39 Signed-off-by: Sung-hun Kim --- arch/arm64/mm/huge_memory.c | 75 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/swapops.h | 21 +++++++++++++ mm/migrate.c | 17 +++++----- mm/rmap.c | 8 +++++ 4 files changed, 111 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/huge_memory.c b/arch/arm64/mm/huge_memory.c index 2ef1a21..1073fde 100644 --- a/arch/arm64/mm/huge_memory.c +++ b/arch/arm64/mm/huge_memory.c @@ -1087,4 +1087,79 @@ void split_huge_pte_address(struct vm_area_struct *vma, unsigned long address, __split_huge_pte(vma, pmd, pte, haddr, freeze, page); } + +void set_huge_pte_migration_entry( + struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + int i; + struct vm_area_struct *vma = pvmw->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long address = pvmw->address; + pte_t pteval, *pte; + swp_entry_t entry; + pte_t pteswp; + struct page *_page = page; + + if (!(pvmw->pmd && pvmw->pte)) + return; + + flush_cache_range(vma, address, address + HPAGE_CONT_PTE_SIZE); + pte = pvmw->pte; + + //arch_set_huge_pte_at(mm, address, pvmw->pte, ptee); + for (i = 0, pte = pvmw->pte; i < HPAGE_CONT_PTE_NR; i++, pte++) { + pteval = ptep_invalidate(vma, address, pte); + if (pte_dirty(pteval)) + set_page_dirty(_page); + entry = make_migration_entry(page, pte_write(pteval)); + pteswp = swp_entry_to_pte(entry); + if (pte_soft_dirty(pteval)) + pteswp = pte_swp_mksoft_dirty(pteswp); + set_pte_at(mm, address, pte, pteswp); + _page++; + address += PAGE_SIZE; + } + + pvmw->pte = pte; + pvmw->address = address; + + page_remove_rmap(page, true); + put_page(page); +} + +void remove_migration_huge_pte( + struct page_vma_mapped_walk *pvmw, struct page *new) +{ + struct vm_area_struct *vma = pvmw->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long address = pvmw->address; + unsigned long mmun_start = address & HPAGE_CONT_PTE_MASK; + pte_t ptee; + swp_entry_t entry; + + if (!(pvmw->pmd && !pvmw->pte)) + return; + + entry = pmd_to_swp_entry(*pvmw->pmd); + get_page(new); + ptee = pte_mkold(arch_make_huge_pte(new, vma)); + if (pte_swp_soft_dirty(*pvmw->pte)) + ptee = pte_mksoft_dirty(ptee); + if (is_write_migration_entry(entry)) + ptee = maybe_mkwrite(ptee, vma); + + flush_cache_range(vma, mmun_start, mmun_start + HPAGE_CONT_PTE_SIZE); + if (PageAnon(new)) + page_add_anon_rmap(new, vma, mmun_start, true); + else + page_add_file_rmap(new, true); + + arch_set_huge_pte_at(mm, mmun_start, pvmw->pte, ptee, 0); + if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) + mlock_vma_page(new); + pvmw->address = address + HPAGE_CONT_PTE_SIZE; + pvmw->pte = pvmw->pte + HPAGE_CONT_PTE_NR; + update_mmu_cache_pmd(vma, address, pvmw->pmd); +} #endif /* CONFIG_FINEGRAINED_THP */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 71aa4b7..bdfbc8e 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -250,6 +250,14 @@ extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new); +#ifdef CONFIG_FINEGRAINED_THP +extern void set_huge_pte_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page); + +extern void remove_migration_huge_pte(struct page_vma_mapped_walk *pvmw, + struct page *new); +#endif + extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) @@ -292,6 +300,19 @@ static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, { BUILD_BUG(); } +#ifdef CONFIG_FINEGRAINED_THP +static inline void set_huge_pte_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + BUILD_BUG(); +} + +static inline void remove_migration_huge_pte(struct page_vma_mapped_walk *pvmw, + struct page *new) +{ + BUILD_BUG(); +} +#endif static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } diff --git a/mm/migrate.c b/mm/migrate.c index b16e340..de299c3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -230,6 +230,13 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, remove_migration_pmd(&pvmw, new); continue; } +#ifdef CONFIG_FINEGRAINED_THP + if (PageTransHuge(page) && pte_cont(*pvmw.pte)) { + VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); + remove_migration_huge_pte(&pvmw, new); + continue; + } +#endif /* CONFIG_FINEGRAINED_THP */ #endif get_page(new); @@ -266,16 +273,6 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, page_dup_rmap(new, true); } else #endif -#ifdef CONFIG_FINEGRAINED_THP - if (PageTransHuge(new)) { - pte = pte_mkcont(pte_mkhuge(pte)); - arch_set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte, 0); - if (PageAnon(new)) - page_add_anon_rmap(new, vma, pvmw.address, true); - else - page_dup_rmap(new, true); - } else -#endif /* CONFIG_FINEGRAINED_THP */ { set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); diff --git a/mm/rmap.c b/mm/rmap.c index 64de8c1..0eca948 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1480,6 +1480,14 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, set_pmd_migration_entry(&pvmw, page); continue; } +#ifdef CONFIG_FINEGRAINED_THP + if (pvmw.pte && pte_cont(*pvmw.pte) && (flags & TTU_MIGRATION)) { + VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); + + set_huge_pte_migration_entry(&pvmw, page); + continue; + } +#endif /* CONFIG_FINEGRAINED_THP */ #endif /* -- 2.7.4 From a3ab8122aee25b0a8dc9d5dec53bc0ce7dcb6fc0 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 22 Sep 2021 15:29:33 +0200 Subject: [PATCH 12/16] mm: thp: count 64k shmem pages separately 64k THP for shmem needs separate counters, otherwise ShmemHugePages entry in /proc/meminfo incorrectly shows both 2M and 64k THPs as 2M ones. Signed-off-by: Marek Szyprowski Change-Id: I460ea9f4e9c2f84bb066f68bfb6a291183416bb1 --- fs/proc/meminfo.c | 4 ++++ include/linux/mmzone.h | 3 +++ mm/filemap.c | 10 +++++++++- mm/huge_memory.c | 11 ++++++++--- mm/khugepaged.c | 7 +++++++ mm/page_alloc.c | 6 ++++++ mm/shmem.c | 5 +++++ mm/vmstat.c | 3 +++ 8 files changed, 45 insertions(+), 4 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index abc072ba..e619d5b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -136,6 +136,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif /* CONFIG_FINEGRAINED_THP */ show_val_kb(m, "ShmemHugePages: ", global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR); +#ifdef CONFIG_FINEGRAINED_THP + show_val_kb(m, "Shmem64KBPages: ", + global_node_page_state(NR_SHMEM_64KB_THPS) * HPAGE_CONT_PTE_NR); +#endif /* CONFIG_FINEGRAINED_THP */ show_val_kb(m, "ShmemPmdMapped: ", global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR); #ifdef CONFIG_FINEGRAINED_THP diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 26df92e..5b4424e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -192,6 +192,9 @@ enum node_stat_item { NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_SHMEM_THPS, +#ifdef CONFIG_FINEGRAINED_THP + NR_SHMEM_64KB_THPS, +#endif /* CONFIG_FINEGRAINED_THP */ NR_SHMEM_PMDMAPPED, #ifdef CONFIG_FINEGRAINED_THP NR_SHMEM_PTEMAPPED, diff --git a/mm/filemap.c b/mm/filemap.c index 02099ca..4ef7518 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -203,8 +203,16 @@ static void unaccount_page_cache_page(struct address_space *mapping, __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); if (PageSwapBacked(page)) { __mod_lruvec_page_state(page, NR_SHMEM, -nr); - if (PageTransHuge(page)) + if (PageTransHuge(page)) { +#ifdef CONFIG_FINEGRAINED_THP + if (thp_nr_pages(page) == HPAGE_PMD_NR) + __dec_node_page_state(page, NR_SHMEM_THPS); + else + __dec_node_page_state(page, NR_SHMEM_64KB_THPS); +#else /* CONFIG_FINEGRAINED_THP */ __dec_node_page_state(page, NR_SHMEM_THPS); +#endif /* CONFIG_FINEGRAINED_THP */ + } } else if (PageTransHuge(page)) { #ifdef CONFIG_FINEGRAINED_THP if (thp_nr_pages(page) == HPAGE_PMD_NR) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 23d21e5..e2ab0df 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2824,9 +2824,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } spin_unlock(&ds_queue->split_queue_lock); if (mapping) { - if (PageSwapBacked(head)) - __dec_node_page_state(head, NR_SHMEM_THPS); - else { + if (PageSwapBacked(head)) { +#ifdef CONFIG_FINEGRAINED_THP + if (thp_nr_pages(head) == HPAGE_CONT_PTE_NR) + __dec_node_page_state(head, NR_SHMEM_64KB_THPS); + else +#endif /* CONFIG_FINEGRAINED_THP */ + __dec_node_page_state(head, NR_SHMEM_THPS); + } else { #ifdef CONFIG_FINEGRAINED_THP if (thp_nr_pages(head) == HPAGE_CONT_PTE_NR) __dec_node_page_state(head, NR_FILE_64KB_THPS); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 99cc150..39ee0fb 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2337,7 +2337,14 @@ out_unlock: } if (is_shmem) +#ifdef CONFIG_FINEGRAINED_THP + if (hpage_type == THP_TYPE_64KB) + __inc_node_page_state(new_page, NR_SHMEM_64KB_THPS); + else + __inc_node_page_state(new_page, NR_SHMEM_THPS); +#else /* CONFIG_FINEGRAINED_THP */ __inc_node_page_state(new_page, NR_SHMEM_THPS); +#endif /* CONFIG_FINEGRAINED_THP */ else { #ifdef CONFIG_FINEGRAINED_THP if (hpage_type == THP_TYPE_64KB) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 327e033..030b94c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5493,6 +5493,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " shmem:%lukB" #ifdef CONFIG_TRANSPARENT_HUGEPAGE " shmem_thp: %lukB" +#ifdef CONFIG_FINEGRAINED_THP + " shmem_64kb_thp: %lukB" +#endif /* CONFIG_FINEGRAINED_THP */ " shmem_pmdmapped: %lukB" " anon_thp: %lukB" #endif @@ -5517,6 +5520,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), +#ifdef CONFIG_FINEGRAINED_THP + K(node_page_state(pgdat, NR_SHMEM_64KB_THPS) * HPAGE_CONT_PTE_NR), +#endif /* CONFIG_FINEGRAINED_THP */ K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), diff --git a/mm/shmem.c b/mm/shmem.c index 01c9b74..2ec8fab 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -713,6 +713,11 @@ next: } if (PageTransHuge(page)) { count_vm_event(THP_FILE_ALLOC); +#ifdef CONFIG_FINEGRAINED_THP + if (thp_nr_pages(page) == HPAGE_CONT_PTE_NR) + __inc_node_page_state(page, NR_SHMEM_64KB_THPS); + else +#endif /* CONFIG_FINEGRAINED_THP */ __inc_node_page_state(page, NR_SHMEM_THPS); } mapping->nrpages += nr; diff --git a/mm/vmstat.c b/mm/vmstat.c index 42f5ef2..feb65b0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1200,6 +1200,9 @@ const char * const vmstat_text[] = { "nr_writeback_temp", "nr_shmem", "nr_shmem_hugepages", +#ifdef CONFIG_FINEGRAINED_THP + "nr_shmem_64kb_hugepages", +#endif "nr_shmem_pmdmapped", #ifdef CONFIG_FINEGRAINED_THP "nr_shmem_ptemapped", -- 2.7.4 From 1cb2541ce6d35eb3f3d0dd7e2749c11ba267c2ba Mon Sep 17 00:00:00 2001 From: Adrian Szyndela Date: Thu, 30 Sep 2021 15:44:57 +0900 Subject: [PATCH 13/16] kdbus: don't unlink interrupted replies When a signal breaks a synchronous call, and a reply is received before restart is executed, then the reply is unlinked and the restarted call can't pick it up anymore. This commit leaves replies linked if they were interrupted. Change-Id: I89c353ecc2bae83f7c12bb199480423d743ed5bc Signed-off-by: Adrian Szyndela --- ipc/kdbus/connection.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ipc/kdbus/connection.c b/ipc/kdbus/connection.c index 02deba36..6479c04 100644 --- a/ipc/kdbus/connection.c +++ b/ipc/kdbus/connection.c @@ -1135,7 +1135,8 @@ static int kdbus_conn_reply(struct kdbus_conn *src, if (reply) { if (reply->sync) wake = kdbus_reply_ref(reply); - kdbus_reply_unlink(reply); + if (!reply->interrupted) + kdbus_reply_unlink(reply); } mutex_unlock(&dst->lock); -- 2.7.4 From 5b61243f1343850f11e583e0e8dd4ffc37bef580 Mon Sep 17 00:00:00 2001 From: Adrian Szyndela Date: Thu, 7 Oct 2021 11:27:38 +0200 Subject: [PATCH 14/16] kdbus: don't unlink _synchronous_ replies Keeping only interrupted synchronous calls linked is not enough. If a reply comes just after the signal, before marking the reply structure as interrupted, then the reply is unlinked and the restarted call can't pick it up anymore. This commit leaves all synchronous replies linked. This way: - non-synchronous calls work as before the changes - replies are unlinked by the replier in kdbus_conn_reply(); - replies for synchronous calls are unlinked by the caller in kdbus_conn_wait_reply(). Change-Id: If162f96a14d51d6a4475fe5c55039dc92236b19a Signed-off-by: Adrian Szyndela --- ipc/kdbus/connection.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/kdbus/connection.c b/ipc/kdbus/connection.c index 6479c04..a40be5f 100644 --- a/ipc/kdbus/connection.c +++ b/ipc/kdbus/connection.c @@ -1135,7 +1135,7 @@ static int kdbus_conn_reply(struct kdbus_conn *src, if (reply) { if (reply->sync) wake = kdbus_reply_ref(reply); - if (!reply->interrupted) + else kdbus_reply_unlink(reply); } mutex_unlock(&dst->lock); -- 2.7.4 From f90f5c8c5d68f6a212e6f62e329b5d7b75b6b9d5 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Thu, 30 Sep 2021 09:37:45 +0900 Subject: [PATCH 15/16] meminfo, thp: modify ifdef coverage to remove unexpected variable printing If the system uses fTHP, the user can show statistics of hugepage-mapped CMA pages via /proc/meminfo. Otherwise, the user does not need to aware of such variables. This patch removes such exported variables when fTHP-disabled kernel is used. Change-Id: Iaff9dd8d81da1a3caa60959b9c5c1f44544f30d4 Signed-off-by: Sung-hun Kim --- fs/proc/meminfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index e619d5b..7cc0633 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -157,9 +157,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_FILE_PTEMAPPED) * HPAGE_CONT_PTE_NR); show_val_kb(m, "PhysCPteMapped: ", phys_cont_pte_pages()); -#endif /* CONFIG_FINEGRAINED_THP */ show_val_kb(m, "PhysPmdMapped: ", phys_huge_pmd_pages()); +#endif /* CONFIG_FINEGRAINED_THP */ #endif #ifdef CONFIG_CMA show_val_kb(m, "CmaTotal: ", totalcma_pages); -- 2.7.4 From 8e4778dccf771e3f2caa982731b6056ecf8b8779 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Thu, 30 Sep 2021 12:28:56 +0900 Subject: [PATCH 16/16] mm, thp: modify coverage of CONFIG_FINEGRAINED_THP macro Some codes should be applied only in fTHP-enabled kernel. This patch rearranges the coverage of CONFIG_FINEGRAINED_THP macro. Change-Id: I0541c36369f8bd7a8fe4b8868c51dc0e6879f100 Signed-off-by: Sung-hun Kim --- mm/huge_memory.c | 7 +++++- mm/memory.c | 66 +++++++++++++++++++++++++++----------------------------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e2ab0df..efc73a0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2194,6 +2194,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } } +#ifdef CONFIG_FINEGRAINED_THP static int thp_pte_alloc_locked(struct mm_struct *mm, pmd_t *pmd) { pgtable_t new = pte_alloc_one(mm); @@ -2248,6 +2249,7 @@ static inline pgprot_t thp_pmd_pgprot(pmd_t pmd) return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd)); } +#endif void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct page *page) @@ -2275,6 +2277,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, } repeat: +#ifdef CONFIG_FINEGRAINED_THP if (pmd_trans_huge(*pmd) && !vm_normal_page_pmd(vma, address, *pmd)) { struct mm_struct *mm = vma->vm_mm; unsigned long haddr = address & HPAGE_PMD_MASK; @@ -2287,7 +2290,9 @@ repeat: pmd_pfn(orig_pmd), thp_pmd_pgprot(orig_pmd)); goto out; - } else if (pmd_trans_huge(*pmd) && vm_normal_page_pmd(vma, address, *pmd)) { + } else +#endif /* CONFIG_FINEGRAINED_THP */ + if (pmd_trans_huge(*pmd) && vm_normal_page_pmd(vma, address, *pmd)) { if (!page) { page = pmd_page(*pmd); /* diff --git a/mm/memory.c b/mm/memory.c index f1e5eb9..eeb7825 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2229,37 +2229,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, { return arch_remap_pte_range(mm, pmd, addr, end, pfn, prot); } -#else /* CONFIG_FINEGRAINED_THP */ -static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned long pfn, pgprot_t prot) -{ - pte_t *pte, *mapped_pte; - spinlock_t *ptl; - int err = 0; - mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; - arch_enter_lazy_mmu_mode(); - do { - BUG_ON(!pte_none(*pte)); - if (!pfn_modify_allowed(pfn, prot)) { - err = -EACCES; - break; - } - - set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); - pfn++; - pte++; - addr += PAGE_SIZE; - } while (addr != end); - arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(mapped_pte, ptl); - return err; -} -#endif /* CONFIG_FINEGRAINED_THP */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE static int remap_try_huge_pmd(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) @@ -2291,7 +2261,36 @@ static int remap_try_huge_pmd(struct mm_struct *mm, pmd_t *pmd, unsigned long ad return ret; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#else /* CONFIG_FINEGRAINED_THP */ +static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + pte_t *pte, *mapped_pte; + spinlock_t *ptl; + int err = 0; + + mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + arch_enter_lazy_mmu_mode(); + do { + BUG_ON(!pte_none(*pte)); + if (!pfn_modify_allowed(pfn, prot)) { + err = -EACCES; + break; + } + + set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); + pfn++; + pte++; + addr += PAGE_SIZE; + } while (addr != end); + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(mapped_pte, ptl); + return err; +} +#endif /* CONFIG_FINEGRAINED_THP */ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, @@ -2308,12 +2307,11 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, VM_BUG_ON(pmd_trans_huge(*pmd)); do { next = pmd_addr_end(addr, end); - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#ifdef CONFIG_FINEGRAINED_THP if (remap_try_huge_pmd(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot)) continue; -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_FINEGRAINED_THP */ err = remap_pte_range(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) -- 2.7.4