+#ifdef CONFIG_FINEGRAINED_THP
+/*
+ * if return value > 0 -> vma can make hugepage
+ * calculated hugepage start and hugepage end are stored in pointers
+ * otherwise -> vma cannot make hugepage
+ */
+static inline int hugepage_determine_htype(unsigned long vm_start,
+ unsigned long vm_end, unsigned long *hstart, unsigned long *hend) {
+ unsigned long start, end;
+
+ /* determine 2MB hugepage */
+ start = (vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+ end = vm_end & HPAGE_PMD_MASK;
+ if (start >= end) {
+ /* determine 64KB hugepage */
+ start = (vm_start + ~HPAGE_CONT_PTE_MASK) & HPAGE_CONT_PTE_MASK;
+ end = vm_end & HPAGE_CONT_PTE_MASK;
+ if (start >= end)
+ return THP_TYPE_FAIL;
+ *hstart = start;
+ *hend = end;
+ return THP_TYPE_64KB;
+ }
+ *hstart = start;
+ *hend = end;
+ return THP_TYPE_2MB;
+}
+
+enum {
+ KHUGEPAGE_SCAN_CONTINUE,
+ KHUGEPAGE_SCAN_BREAK,
+ KHUGEPAGE_SCAN_BREAK_MMAP_LOCK,
+};
+
+static unsigned int khugepaged_scan_vma(struct mm_struct *mm,
+ struct vm_area_struct *vma, struct page **hpage,
+ unsigned int pages, int *progress)
+{
+ unsigned long hstart, hend;
+ int hpage_type, ret;
+ int hpage_size, hpage_nr;
+
+ if (!hugepage_vma_check(vma, vma->vm_flags))
+ return KHUGEPAGE_SCAN_CONTINUE;
+
+ hpage_type = hugepage_determine_htype(
+ (vma->vm_start > khugepaged_scan.address) ?
+ vma->vm_start : khugepaged_scan.address,
+ vma->vm_end, &hstart, &hend);
+
+ if (hpage_type == THP_TYPE_FAIL)
+ return KHUGEPAGE_SCAN_CONTINUE;
+ if (khugepaged_scan.address > hend)
+ return KHUGEPAGE_SCAN_CONTINUE;
+ if (khugepaged_scan.address < hstart)
+ khugepaged_scan.address = hstart;
+
+ if (hpage_type == THP_TYPE_64KB) {
+ VM_BUG_ON(khugepaged_scan.address & ~HPAGE_CONT_PTE_MASK);
+ hpage_size = HPAGE_CONT_PTE_SIZE; /* 64KB */
+ hpage_nr = HPAGE_CONT_PTE_NR;
+ } else if (hpage_type == THP_TYPE_2MB) {
+ VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
+ hpage_size = HPAGE_PMD_SIZE; /* 2MB */
+ hpage_nr = HPAGE_PMD_NR;
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
+ !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+ HPAGE_PMD_NR)) {
+ /* fallback, vma or file not aligned to 2MB */
+ hpage_size = HPAGE_CONT_PTE_SIZE; /* 64KB */
+ hpage_nr = HPAGE_CONT_PTE_NR;
+ hpage_type = THP_TYPE_64KB;
+ }
+ } else
+ BUG();
+
+ while (khugepaged_scan.address < hend) {
+ if (khugepaged_scan.address + hpage_size > hend) {
+ if (khugepaged_scan.address + HPAGE_CONT_PTE_SIZE < hend) {
+ hpage_size = HPAGE_CONT_PTE_SIZE;
+ hpage_nr = HPAGE_CONT_PTE_NR;
+ hpage_type = THP_TYPE_64KB;
+ }
+ }
+ ret = 0;
+ cond_resched();
+ if (unlikely(khugepaged_test_exit(mm)))
+ return KHUGEPAGE_SCAN_BREAK;
+
+ VM_BUG_ON(khugepaged_scan.address < hstart ||
+ khugepaged_scan.address + hpage_size >
+ hend);
+ if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
+ struct file *file = get_file(vma->vm_file);
+ pgoff_t pgoff = linear_page_index(vma,
+ khugepaged_scan.address);
+
+ mmap_read_unlock(mm);
+ ret = 1;
+ khugepaged_scan_file(mm, file, pgoff, hpage, hpage_type);
+ fput(file);
+ } else {
+ ret = khugepaged_scan_pmd(mm, vma,
+ khugepaged_scan.address,
+ hpage, hpage_type);
+ }
+ /* move to next address */
+ khugepaged_scan.address += hpage_size;
+ *progress += hpage_nr;
+ if (ret)
+ /* we released mmap_sem so break loop */
+ return KHUGEPAGE_SCAN_BREAK_MMAP_LOCK;
+ if (*progress >= pages)
+ return KHUGEPAGE_SCAN_BREAK;
+ }
+ return KHUGEPAGE_SCAN_CONTINUE;
+}
+
+static struct thp_scan_hint *find_scan_hint(struct mm_slot *slot,
+ unsigned long addr)
+{
+ struct thp_scan_hint *hint;
+
+ list_for_each_entry(hint, &khugepaged_scan.hint_list, hint_list) {
+ if (hint->slot == slot)
+ return hint;
+ }
+ return NULL;
+}
+
+#ifdef CONFIG_THP_CONSERVATIVE
+/* caller must hold a proper mmap_lock */
+void khugepaged_mem_hook(struct mm_struct *mm, unsigned long addr,
+ long diff, const char *debug)
+{
+ struct mm_slot *slot;
+ struct vm_area_struct *vma;
+ struct thp_scan_hint *hint;
+ bool wakeup = false;
+ bool retry = false;
+
+ vma = find_vma(mm, addr);
+ if (!hugepage_vma_check(vma, vma->vm_flags))
+ return;
+
+again:
+ spin_lock(&khugepaged_mm_lock);
+ slot = get_mm_slot(mm);
+ if (!slot) {
+ /* make a new slot or go out */
+ spin_unlock(&khugepaged_mm_lock);
+ if (retry)
+ return;
+ if (__khugepaged_enter(mm))
+ return;
+ retry = true;
+ goto again;
+ }
+
+ hint = find_scan_hint(slot, addr);
+ if (!hint) {
+ spin_unlock(&khugepaged_mm_lock);
+ hint = kzalloc(sizeof(struct thp_scan_hint), GFP_KERNEL);
+ hint->vma = vma;
+ hint->slot = slot;
+ hint->diff = 0;
+ hint->jiffies = jiffies;
+ spin_lock(&khugepaged_mm_lock);
+ list_add(&hint->hint_list, &khugepaged_scan.hint_list);
+ khugepaged_scan.nr_hint++;
+ }
+ hint->diff += diff;
+ if (hint->diff >= HPAGE_CONT_PTE_SIZE) {
+ wakeup = true;
+ //list_move(&hint->hint_list, &khugepaged_scan.hint_list);
+ }
+ spin_unlock(&khugepaged_mm_lock);
+
+ /* if possible, wake khugepaged up for starting a scan */
+ if (wakeup) {
+ wake_up_interruptible(&khugepaged_wait);
+ }
+}
+#else /* CONFIG_THP_CONSERVATIVE */
+void khugepaged_mem_hook(struct mm_struct *mm,
+ unsigned long addr, long diff, const char *debug)
+{}
+#endif /* CONFIG_THP_CONSERVATIVE */
+
+static void clear_hint_list(struct mm_slot *slot)
+{
+ struct thp_scan_hint *hint;
+ hint = find_scan_hint(slot, 0);
+ if (hint) {
+ list_del(&hint->hint_list);
+ kfree(hint);
+ khugepaged_scan.nr_hint--;
+ }
+}
+
+static struct thp_scan_hint *get_next_hint(void)
+{
+ if (!list_empty(&khugepaged_scan.hint_list)) {
+ struct thp_scan_hint *hint = list_first_entry(
+ &khugepaged_scan.hint_list,
+ struct thp_scan_hint, hint_list);
+ list_del(&hint->hint_list);
+ khugepaged_scan.nr_hint--;
+ return hint;
+ }
+ return NULL;
+}
+#endif /* CONFIG_FINEGRAINED_THP */
+