filemap: update offset check in filemap_fault()
[platform/kernel/linux-starfive.git] / mm / page_owner.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/debugfs.h>
3 #include <linux/mm.h>
4 #include <linux/slab.h>
5 #include <linux/uaccess.h>
6 #include <linux/memblock.h>
7 #include <linux/stacktrace.h>
8 #include <linux/page_owner.h>
9 #include <linux/jump_label.h>
10 #include <linux/migrate.h>
11 #include <linux/stackdepot.h>
12 #include <linux/seq_file.h>
13
14 #include "internal.h"
15
16 /*
17  * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
18  * to use off stack temporal storage
19  */
20 #define PAGE_OWNER_STACK_DEPTH (16)
21
22 struct page_owner {
23         unsigned short order;
24         short last_migrate_reason;
25         gfp_t gfp_mask;
26         depot_stack_handle_t handle;
27 #ifdef CONFIG_DEBUG_PAGEALLOC
28         depot_stack_handle_t free_handle;
29 #endif
30 };
31
32 static bool page_owner_disabled = true;
33 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
34
35 static depot_stack_handle_t dummy_handle;
36 static depot_stack_handle_t failure_handle;
37 static depot_stack_handle_t early_handle;
38
39 static void init_early_allocated_pages(void);
40
41 static int __init early_page_owner_param(char *buf)
42 {
43         if (!buf)
44                 return -EINVAL;
45
46         if (strcmp(buf, "on") == 0)
47                 page_owner_disabled = false;
48
49         return 0;
50 }
51 early_param("page_owner", early_page_owner_param);
52
53 static bool need_page_owner(void)
54 {
55         if (page_owner_disabled)
56                 return false;
57
58         return true;
59 }
60
61 static __always_inline depot_stack_handle_t create_dummy_stack(void)
62 {
63         unsigned long entries[4];
64         unsigned int nr_entries;
65
66         nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
67         return stack_depot_save(entries, nr_entries, GFP_KERNEL);
68 }
69
70 static noinline void register_dummy_stack(void)
71 {
72         dummy_handle = create_dummy_stack();
73 }
74
75 static noinline void register_failure_stack(void)
76 {
77         failure_handle = create_dummy_stack();
78 }
79
80 static noinline void register_early_stack(void)
81 {
82         early_handle = create_dummy_stack();
83 }
84
85 static void init_page_owner(void)
86 {
87         if (page_owner_disabled)
88                 return;
89
90         register_dummy_stack();
91         register_failure_stack();
92         register_early_stack();
93         static_branch_enable(&page_owner_inited);
94         init_early_allocated_pages();
95 }
96
97 struct page_ext_operations page_owner_ops = {
98         .size = sizeof(struct page_owner),
99         .need = need_page_owner,
100         .init = init_page_owner,
101 };
102
103 static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
104 {
105         return (void *)page_ext + page_owner_ops.offset;
106 }
107
108 static inline bool check_recursive_alloc(unsigned long *entries,
109                                          unsigned int nr_entries,
110                                          unsigned long ip)
111 {
112         unsigned int i;
113
114         for (i = 0; i < nr_entries; i++) {
115                 if (entries[i] == ip)
116                         return true;
117         }
118         return false;
119 }
120
121 static noinline depot_stack_handle_t save_stack(gfp_t flags)
122 {
123         unsigned long entries[PAGE_OWNER_STACK_DEPTH];
124         depot_stack_handle_t handle;
125         unsigned int nr_entries;
126
127         nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
128
129         /*
130          * We need to check recursion here because our request to
131          * stackdepot could trigger memory allocation to save new
132          * entry. New memory allocation would reach here and call
133          * stack_depot_save_entries() again if we don't catch it. There is
134          * still not enough memory in stackdepot so it would try to
135          * allocate memory again and loop forever.
136          */
137         if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
138                 return dummy_handle;
139
140         handle = stack_depot_save(entries, nr_entries, flags);
141         if (!handle)
142                 handle = failure_handle;
143
144         return handle;
145 }
146
147 void __reset_page_owner(struct page *page, unsigned int order)
148 {
149         int i;
150         struct page_ext *page_ext;
151 #ifdef CONFIG_DEBUG_PAGEALLOC
152         depot_stack_handle_t handle = 0;
153         struct page_owner *page_owner;
154
155         if (debug_pagealloc_enabled())
156                 handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
157 #endif
158
159         for (i = 0; i < (1 << order); i++) {
160                 page_ext = lookup_page_ext(page + i);
161                 if (unlikely(!page_ext))
162                         continue;
163                 __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
164 #ifdef CONFIG_DEBUG_PAGEALLOC
165                 if (debug_pagealloc_enabled()) {
166                         page_owner = get_page_owner(page_ext);
167                         page_owner->free_handle = handle;
168                 }
169 #endif
170         }
171 }
172
173 static inline void __set_page_owner_handle(struct page *page,
174         struct page_ext *page_ext, depot_stack_handle_t handle,
175         unsigned int order, gfp_t gfp_mask)
176 {
177         struct page_owner *page_owner;
178         int i;
179
180         for (i = 0; i < (1 << order); i++) {
181                 page_owner = get_page_owner(page_ext);
182                 page_owner->handle = handle;
183                 page_owner->order = order;
184                 page_owner->gfp_mask = gfp_mask;
185                 page_owner->last_migrate_reason = -1;
186                 __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
187                 __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
188
189                 page_ext = lookup_page_ext(page + i);
190         }
191 }
192
193 noinline void __set_page_owner(struct page *page, unsigned int order,
194                                         gfp_t gfp_mask)
195 {
196         struct page_ext *page_ext = lookup_page_ext(page);
197         depot_stack_handle_t handle;
198
199         if (unlikely(!page_ext))
200                 return;
201
202         handle = save_stack(gfp_mask);
203         __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
204 }
205
206 void __set_page_owner_migrate_reason(struct page *page, int reason)
207 {
208         struct page_ext *page_ext = lookup_page_ext(page);
209         struct page_owner *page_owner;
210
211         if (unlikely(!page_ext))
212                 return;
213
214         page_owner = get_page_owner(page_ext);
215         page_owner->last_migrate_reason = reason;
216 }
217
218 void __split_page_owner(struct page *page, unsigned int order)
219 {
220         int i;
221         struct page_ext *page_ext = lookup_page_ext(page);
222         struct page_owner *page_owner;
223
224         if (unlikely(!page_ext))
225                 return;
226
227         page_owner = get_page_owner(page_ext);
228         page_owner->order = 0;
229         for (i = 1; i < (1 << order); i++) {
230                 page_ext = lookup_page_ext(page + i);
231                 page_owner = get_page_owner(page_ext);
232                 page_owner->order = 0;
233         }
234 }
235
236 void __copy_page_owner(struct page *oldpage, struct page *newpage)
237 {
238         struct page_ext *old_ext = lookup_page_ext(oldpage);
239         struct page_ext *new_ext = lookup_page_ext(newpage);
240         struct page_owner *old_page_owner, *new_page_owner;
241
242         if (unlikely(!old_ext || !new_ext))
243                 return;
244
245         old_page_owner = get_page_owner(old_ext);
246         new_page_owner = get_page_owner(new_ext);
247         new_page_owner->order = old_page_owner->order;
248         new_page_owner->gfp_mask = old_page_owner->gfp_mask;
249         new_page_owner->last_migrate_reason =
250                 old_page_owner->last_migrate_reason;
251         new_page_owner->handle = old_page_owner->handle;
252
253         /*
254          * We don't clear the bit on the oldpage as it's going to be freed
255          * after migration. Until then, the info can be useful in case of
256          * a bug, and the overal stats will be off a bit only temporarily.
257          * Also, migrate_misplaced_transhuge_page() can still fail the
258          * migration and then we want the oldpage to retain the info. But
259          * in that case we also don't need to explicitly clear the info from
260          * the new page, which will be freed.
261          */
262         __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
263         __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
264 }
265
266 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
267                                        pg_data_t *pgdat, struct zone *zone)
268 {
269         struct page *page;
270         struct page_ext *page_ext;
271         struct page_owner *page_owner;
272         unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
273         unsigned long end_pfn = pfn + zone->spanned_pages;
274         unsigned long count[MIGRATE_TYPES] = { 0, };
275         int pageblock_mt, page_mt;
276         int i;
277
278         /* Scan block by block. First and last block may be incomplete */
279         pfn = zone->zone_start_pfn;
280
281         /*
282          * Walk the zone in pageblock_nr_pages steps. If a page block spans
283          * a zone boundary, it will be double counted between zones. This does
284          * not matter as the mixed block count will still be correct
285          */
286         for (; pfn < end_pfn; ) {
287                 if (!pfn_valid(pfn)) {
288                         pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
289                         continue;
290                 }
291
292                 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
293                 block_end_pfn = min(block_end_pfn, end_pfn);
294
295                 page = pfn_to_page(pfn);
296                 pageblock_mt = get_pageblock_migratetype(page);
297
298                 for (; pfn < block_end_pfn; pfn++) {
299                         if (!pfn_valid_within(pfn))
300                                 continue;
301
302                         page = pfn_to_page(pfn);
303
304                         if (page_zone(page) != zone)
305                                 continue;
306
307                         if (PageBuddy(page)) {
308                                 unsigned long freepage_order;
309
310                                 freepage_order = page_order_unsafe(page);
311                                 if (freepage_order < MAX_ORDER)
312                                         pfn += (1UL << freepage_order) - 1;
313                                 continue;
314                         }
315
316                         if (PageReserved(page))
317                                 continue;
318
319                         page_ext = lookup_page_ext(page);
320                         if (unlikely(!page_ext))
321                                 continue;
322
323                         if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
324                                 continue;
325
326                         page_owner = get_page_owner(page_ext);
327                         page_mt = gfpflags_to_migratetype(
328                                         page_owner->gfp_mask);
329                         if (pageblock_mt != page_mt) {
330                                 if (is_migrate_cma(pageblock_mt))
331                                         count[MIGRATE_MOVABLE]++;
332                                 else
333                                         count[pageblock_mt]++;
334
335                                 pfn = block_end_pfn;
336                                 break;
337                         }
338                         pfn += (1UL << page_owner->order) - 1;
339                 }
340         }
341
342         /* Print counts */
343         seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
344         for (i = 0; i < MIGRATE_TYPES; i++)
345                 seq_printf(m, "%12lu ", count[i]);
346         seq_putc(m, '\n');
347 }
348
349 static ssize_t
350 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
351                 struct page *page, struct page_owner *page_owner,
352                 depot_stack_handle_t handle)
353 {
354         int ret, pageblock_mt, page_mt;
355         unsigned long *entries;
356         unsigned int nr_entries;
357         char *kbuf;
358
359         count = min_t(size_t, count, PAGE_SIZE);
360         kbuf = kmalloc(count, GFP_KERNEL);
361         if (!kbuf)
362                 return -ENOMEM;
363
364         ret = snprintf(kbuf, count,
365                         "Page allocated via order %u, mask %#x(%pGg)\n",
366                         page_owner->order, page_owner->gfp_mask,
367                         &page_owner->gfp_mask);
368
369         if (ret >= count)
370                 goto err;
371
372         /* Print information relevant to grouping pages by mobility */
373         pageblock_mt = get_pageblock_migratetype(page);
374         page_mt  = gfpflags_to_migratetype(page_owner->gfp_mask);
375         ret += snprintf(kbuf + ret, count - ret,
376                         "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
377                         pfn,
378                         migratetype_names[page_mt],
379                         pfn >> pageblock_order,
380                         migratetype_names[pageblock_mt],
381                         page->flags, &page->flags);
382
383         if (ret >= count)
384                 goto err;
385
386         nr_entries = stack_depot_fetch(handle, &entries);
387         ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0);
388         if (ret >= count)
389                 goto err;
390
391         if (page_owner->last_migrate_reason != -1) {
392                 ret += snprintf(kbuf + ret, count - ret,
393                         "Page has been migrated, last migrate reason: %s\n",
394                         migrate_reason_names[page_owner->last_migrate_reason]);
395                 if (ret >= count)
396                         goto err;
397         }
398
399         ret += snprintf(kbuf + ret, count - ret, "\n");
400         if (ret >= count)
401                 goto err;
402
403         if (copy_to_user(buf, kbuf, ret))
404                 ret = -EFAULT;
405
406         kfree(kbuf);
407         return ret;
408
409 err:
410         kfree(kbuf);
411         return -ENOMEM;
412 }
413
414 void __dump_page_owner(struct page *page)
415 {
416         struct page_ext *page_ext = lookup_page_ext(page);
417         struct page_owner *page_owner;
418         depot_stack_handle_t handle;
419         unsigned long *entries;
420         unsigned int nr_entries;
421         gfp_t gfp_mask;
422         int mt;
423
424         if (unlikely(!page_ext)) {
425                 pr_alert("There is not page extension available.\n");
426                 return;
427         }
428
429         page_owner = get_page_owner(page_ext);
430         gfp_mask = page_owner->gfp_mask;
431         mt = gfpflags_to_migratetype(gfp_mask);
432
433         if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
434                 pr_alert("page_owner info is not present (never set?)\n");
435                 return;
436         }
437
438         if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
439                 pr_alert("page_owner tracks the page as allocated\n");
440         else
441                 pr_alert("page_owner tracks the page as freed\n");
442
443         pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
444                  page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
445
446         handle = READ_ONCE(page_owner->handle);
447         if (!handle) {
448                 pr_alert("page_owner allocation stack trace missing\n");
449         } else {
450                 nr_entries = stack_depot_fetch(handle, &entries);
451                 stack_trace_print(entries, nr_entries, 0);
452         }
453
454 #ifdef CONFIG_DEBUG_PAGEALLOC
455         handle = READ_ONCE(page_owner->free_handle);
456         if (!handle) {
457                 pr_alert("page_owner free stack trace missing\n");
458         } else {
459                 nr_entries = stack_depot_fetch(handle, &entries);
460                 pr_alert("page last free stack trace:\n");
461                 stack_trace_print(entries, nr_entries, 0);
462         }
463 #endif
464
465         if (page_owner->last_migrate_reason != -1)
466                 pr_alert("page has been migrated, last migrate reason: %s\n",
467                         migrate_reason_names[page_owner->last_migrate_reason]);
468 }
469
470 static ssize_t
471 read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
472 {
473         unsigned long pfn;
474         struct page *page;
475         struct page_ext *page_ext;
476         struct page_owner *page_owner;
477         depot_stack_handle_t handle;
478
479         if (!static_branch_unlikely(&page_owner_inited))
480                 return -EINVAL;
481
482         page = NULL;
483         pfn = min_low_pfn + *ppos;
484
485         /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
486         while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
487                 pfn++;
488
489         drain_all_pages(NULL);
490
491         /* Find an allocated page */
492         for (; pfn < max_pfn; pfn++) {
493                 /*
494                  * If the new page is in a new MAX_ORDER_NR_PAGES area,
495                  * validate the area as existing, skip it if not
496                  */
497                 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
498                         pfn += MAX_ORDER_NR_PAGES - 1;
499                         continue;
500                 }
501
502                 /* Check for holes within a MAX_ORDER area */
503                 if (!pfn_valid_within(pfn))
504                         continue;
505
506                 page = pfn_to_page(pfn);
507                 if (PageBuddy(page)) {
508                         unsigned long freepage_order = page_order_unsafe(page);
509
510                         if (freepage_order < MAX_ORDER)
511                                 pfn += (1UL << freepage_order) - 1;
512                         continue;
513                 }
514
515                 page_ext = lookup_page_ext(page);
516                 if (unlikely(!page_ext))
517                         continue;
518
519                 /*
520                  * Some pages could be missed by concurrent allocation or free,
521                  * because we don't hold the zone lock.
522                  */
523                 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
524                         continue;
525
526                 /*
527                  * Although we do have the info about past allocation of free
528                  * pages, it's not relevant for current memory usage.
529                  */
530                 if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
531                         continue;
532
533                 page_owner = get_page_owner(page_ext);
534
535                 /*
536                  * Don't print "tail" pages of high-order allocations as that
537                  * would inflate the stats.
538                  */
539                 if (!IS_ALIGNED(pfn, 1 << page_owner->order))
540                         continue;
541
542                 /*
543                  * Access to page_ext->handle isn't synchronous so we should
544                  * be careful to access it.
545                  */
546                 handle = READ_ONCE(page_owner->handle);
547                 if (!handle)
548                         continue;
549
550                 /* Record the next PFN to read in the file offset */
551                 *ppos = (pfn - min_low_pfn) + 1;
552
553                 return print_page_owner(buf, count, pfn, page,
554                                 page_owner, handle);
555         }
556
557         return 0;
558 }
559
560 static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
561 {
562         unsigned long pfn = zone->zone_start_pfn;
563         unsigned long end_pfn = zone_end_pfn(zone);
564         unsigned long count = 0;
565
566         /*
567          * Walk the zone in pageblock_nr_pages steps. If a page block spans
568          * a zone boundary, it will be double counted between zones. This does
569          * not matter as the mixed block count will still be correct
570          */
571         for (; pfn < end_pfn; ) {
572                 unsigned long block_end_pfn;
573
574                 if (!pfn_valid(pfn)) {
575                         pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
576                         continue;
577                 }
578
579                 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
580                 block_end_pfn = min(block_end_pfn, end_pfn);
581
582                 for (; pfn < block_end_pfn; pfn++) {
583                         struct page *page;
584                         struct page_ext *page_ext;
585
586                         if (!pfn_valid_within(pfn))
587                                 continue;
588
589                         page = pfn_to_page(pfn);
590
591                         if (page_zone(page) != zone)
592                                 continue;
593
594                         /*
595                          * To avoid having to grab zone->lock, be a little
596                          * careful when reading buddy page order. The only
597                          * danger is that we skip too much and potentially miss
598                          * some early allocated pages, which is better than
599                          * heavy lock contention.
600                          */
601                         if (PageBuddy(page)) {
602                                 unsigned long order = page_order_unsafe(page);
603
604                                 if (order > 0 && order < MAX_ORDER)
605                                         pfn += (1UL << order) - 1;
606                                 continue;
607                         }
608
609                         if (PageReserved(page))
610                                 continue;
611
612                         page_ext = lookup_page_ext(page);
613                         if (unlikely(!page_ext))
614                                 continue;
615
616                         /* Maybe overlapping zone */
617                         if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
618                                 continue;
619
620                         /* Found early allocated page */
621                         __set_page_owner_handle(page, page_ext, early_handle,
622                                                 0, 0);
623                         count++;
624                 }
625                 cond_resched();
626         }
627
628         pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
629                 pgdat->node_id, zone->name, count);
630 }
631
632 static void init_zones_in_node(pg_data_t *pgdat)
633 {
634         struct zone *zone;
635         struct zone *node_zones = pgdat->node_zones;
636
637         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
638                 if (!populated_zone(zone))
639                         continue;
640
641                 init_pages_in_zone(pgdat, zone);
642         }
643 }
644
645 static void init_early_allocated_pages(void)
646 {
647         pg_data_t *pgdat;
648
649         for_each_online_pgdat(pgdat)
650                 init_zones_in_node(pgdat);
651 }
652
653 static const struct file_operations proc_page_owner_operations = {
654         .read           = read_page_owner,
655 };
656
657 static int __init pageowner_init(void)
658 {
659         if (!static_branch_unlikely(&page_owner_inited)) {
660                 pr_info("page_owner is disabled\n");
661                 return 0;
662         }
663
664         debugfs_create_file("page_owner", 0400, NULL, NULL,
665                             &proc_page_owner_operations);
666
667         return 0;
668 }
669 late_initcall(pageowner_init)