4 * Copyright (c) 2015 Samsung Electronics Co., Ltd. All rights reserved.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
28 #include <sys/types.h>
30 #include <sys/syscall.h>
32 #include "config-parser.h"
35 #include "memory-cgroup.h"
38 #include "resourced.h"
41 #include "compact-common.h"
44 * State bit for zone's fragmentation warning
45 * ZONE_FRAG_WARN_RAISE bit is set for each zone
46 * when the fragmentation level reaches specified
47 * value for at least one of supported page orders.
49 #define ZONE_FRAG_WARN_RAISE (0x1 << 0)
51 /* Internal compaction module states */
52 #define COMPACT_IDLE (0)
53 /* State NOTIFIED is to eliminate spurious thread wakeups */
54 #define COMPACT_NOTIFIED (1 << 0)
55 /* Failed to write to procfs entry */
56 #define COMPACT_FAILURE (1 << 1)
58 * Failed to perform one of the basic operations:
59 * like reading /proc/zoneinfo or /proc/pagetypeinfo
60 * Set to indicate that there is no point of return
61 * and that the compaction thread should/can safely
62 * clean things up and call it a day.
64 #define COMPACT_WITHDRAW (1 << 2)
66 * Set when compaction module has been explicitly
69 #define COMPACT_CANCEL (1 << 3)
70 #define COMPACT_SKIP (COMPACT_WITHDRAW | COMPACT_CANCEL)
72 #define MAX_PAGE_ORDER 0xa
75 #define PROC_COMPACT_ENTRY "/proc/sys/vm/compact_memory"
76 #define COMPACT_CONF_FILE RD_CONFIG_FILE(optimizer)
78 #define COMPACT_CONFIG_SECTION "Compaction"
79 #define COMPACT_CONFIG_ENABLE "CompactEnable"
80 #define COMPACT_CONFIG_FRAG "Fraglevel"
83 * Default frag level (percentage, order-based) which determines
84 * when to trigger compaction.
86 #define COMPACT_DEF_FRAG_LEVEL 800 /* 80% */
89 * Note: Tightly coupled with corresponding ids.
90 * Mind while modifying.
92 static const char *zone_names[] = {"Normal", "DMA", "HighMem", "DMA32"};
95 struct memory_info *mem_info;
96 struct zone_info *zone;
101 unsigned long pages_per_order[MAX_PAGE_ORDER +1];
102 unsigned long free_pages;
103 unsigned long wm_min;
104 unsigned long wm_low;
105 unsigned long wm_high;
106 unsigned long managed;
107 unsigned int frag_map:MAX_PAGE_ORDER+1;
108 unsigned int frag_warn:2;
112 unsigned int zone_count;
113 struct zone_info zones[ZONE_MAX_NR];
116 struct compact_control {
117 struct memory_info *mem_info;
118 pthread_t compact_thread;
119 pthread_mutex_t lock;
122 unsigned int compact_type;
125 #define PARSE_TAG(exp, fn, id) \
129 .tag = PARSE_TAG_##id, \
132 #define PARSE_TAG_EMPTY() {0,}
135 * @TODO: This should be attached to module ops
137 struct compact_data {
138 struct compact_control *compact;
139 pthread_mutex_t notify_lock;
140 pthread_cond_t notify;
141 pthread_mutex_t drained_lock;
142 pthread_cond_t drained;
145 static struct compact_data compact_data = {
146 .notify_lock = PTHREAD_MUTEX_INITIALIZER,
147 .notify = PTHREAD_COND_INITIALIZER,
148 .drained_lock = PTHREAD_MUTEX_INITIALIZER,
149 .drained = PTHREAD_COND_INITIALIZER
152 static inline unsigned int get_zone_id(const char *zone_name, size_t len)
156 for (i = 0; i < ARRAY_SIZE(zone_names); ++i) {
157 if (!strncmp(zone_name, zone_names[i], len))
163 static inline const char *get_zone_name(unsigned int zone_id)
165 unsigned int i = ffs(zone_id) - 1;
167 return (i < ARRAY_SIZE(zone_names)) ? zone_names[i] : NULL;
171 * External fragmentation is an issue but one that mostly kernel
172 * should be concerned about, not the user space.
173 * Still though, fragmented *physical* memory may (but does not
174 * have to) lead to system getting less responsive - as the kernel
175 * might get trapped waiting for allocation of high-order
176 * physically-contiguous pages. The fragmentation issue gets more
177 * significant in case of huge pages - thought this is left aside
178 * due to not being relevant, in this particular case.
180 * Triggering the compaction from the user-space is a rather
181 * nasty hack. But if this is to be done, than...
182 * 1. There is not much point in triggering compaction if
183 * the system is already at a heavy memory pressure
184 * and it is struggling to satisfy 0-order allocations
185 * 2. Specifying the overall fragmentation level is quite tricky
186 * and without some decent background of what is/has been going
187 * on as far as memory allocations are being concern (both from
188 * user-space and from the kernel) is not really reliable, to say
189 * at least. All in all, what's the acceptable (whatever it means)
190 * level for external fragmentation? Having most of the available
191 * memory within the low-order page blocks should raise an alert,
192 * but that's only in theory. Things get more complicated when taking
193 * into consideration the migration types of available pages.
194 * This might go wrong in so many ways .....
195 * Shall this be continued ?
197 static void compaction_start(struct compact_control *compact)
199 struct memory_info *mem_info;
200 int current_status = COMPACT_IDLE;
201 _cleanup_close_ int fd = -1;
204 pthread_mutex_lock(&compact->lock);
206 if (compact->status & COMPACT_SKIP)
207 current_status |= COMPACT_WITHDRAW;
209 pthread_mutex_unlock(&compact->lock);
211 if (current_status & COMPACT_WITHDRAW)
214 mem_info = compact->mem_info;
217 fd = open(PROC_COMPACT_ENTRY, O_WRONLY);
219 if (errno == EACCES || errno == EFAULT || errno == ENOENT)
220 current_status |= COMPACT_WITHDRAW;
221 _E("Compaction: failed to open procfs entry [%d]\n", errno);
225 * It doesn't really matter what gets written,
226 * as long as smth gets....
228 if (write(fd, &n, sizeof(n)) <= 0)
229 current_status |= COMPACT_FAILURE;
231 * Reset the external fragmentation warnings.
232 * Locking is not required here as all updates will get suspended
233 * until the compaction status won't indicate all is done here
235 if (current_status & COMPACT_FAILURE)
238 for (n = 0; n < mem_info->zone_count; ++n)
239 mem_info->zones[n].frag_warn &= ~ZONE_FRAG_WARN_RAISE;
242 pthread_mutex_lock(&compact->lock);
243 compact->status |= current_status;
244 pthread_mutex_unlock(&compact->lock);
247 static void compact_validate_zone(struct zone_info *zone,
248 unsigned int frag_level,
249 struct memory_info *mem_info)
251 int order, req_order;
252 unsigned int current_frag_map = 0;
254 * Skip compaction if the system is below the low watermark.
255 * It's gonna be done either way
257 if (zone->free_pages < zone->wm_low) {
258 _I("Skipping validation due to falling below the low watermark\n");
259 _I("Zone %s: number of free pages: %lu low watermark: %lu\n",
260 get_zone_name(zone->id),
261 zone->free_pages, zone->wm_low);
265 for (req_order = 1; req_order <= MAX_PAGE_ORDER; ++req_order) {
266 unsigned long available_pages = 0;
268 for (order = req_order; order <= MAX_PAGE_ORDER; ++order)
269 available_pages += zone->pages_per_order[order] << order;
271 if (zone->free_pages > 0 && (1000 - (available_pages * 1000 / zone->free_pages)) >= frag_level)
272 current_frag_map |= 1 << req_order;
275 if (current_frag_map) {
277 if ((!zone->frag_map && current_frag_map) ||
278 ((zone->frag_map ^ current_frag_map) &&
279 !((zone->frag_map ^ current_frag_map) & zone->frag_map)))
281 zone->frag_warn |= ZONE_FRAG_WARN_RAISE;
284 zone->frag_map = current_frag_map;
287 static void compaction_verify_zone(struct compact_control *compact,
288 struct zone_info *zone)
290 struct memory_info *mem_info = compact->mem_info;
293 * Here comes the shady part:
294 * without some decent memory tracing here it is
295 * truly difficult to determine whether the compaction
296 * is required or not.
298 compact_validate_zone(zone, compact->frag_level, mem_info);
301 static void compaction_verify(struct compact_control *compact)
303 /* Get the overall idea of current external fragmentation */
304 struct memory_info *mem_info = compact->mem_info;
305 unsigned int compact_targets = 0;
309 * Verify each zone although the compaction can be
310 * triggered per node (or globally) only.
312 for (n = 0; n < mem_info->zone_count; ++n) {
313 struct zone_info *zone = &mem_info->zones[n];
316 * Some devices make a zone but don't allocate any pages for it.
317 * We can ignore these empty zones.
319 if (zone->managed <= 0)
322 compaction_verify_zone(compact, zone);
323 if (zone->frag_warn & ZONE_FRAG_WARN_RAISE) {
325 * As the compaction can be triggered either globally
326 * or on per-node it's enough to have at least one
327 * zone for which the external fragmentation got
328 * dangerously high. Still to have a minimum control
329 * over the whole process - validate all zones.
336 compaction_start(compact);
340 #define compact_zoneinfo_set(zone, _off, v) \
341 (*(typeof(v)*)(((char*)(zone)) + _off) = v)
344 static int compact_parse_zone(const char *s, regmatch_t *match,
345 unsigned int parse_tag, void *data)
347 struct parser_data *parser_data = (struct parser_data *)data;
348 unsigned int zone_id;
349 struct zone_info *zone;
351 if (parse_tag != PARSE_TAG_ZONE)
354 zone_id = get_zone_id(s + match[1].rm_so,
355 match[1].rm_eo - match[1].rm_so);
356 zone = parser_data->mem_info->zones;
361 while (zone->id && zone->id != zone_id)
365 ++parser_data->mem_info->zone_count;
369 parser_data->zone = zone;
373 static int compact_parse_zoneinfo(const char *s, regmatch_t *match,
374 unsigned int parse_tag,
377 struct parser_data *parser_data = (struct parser_data *)data;
381 v = strtoul(s + match[1].rm_so, &e, 0);
386 case PARSE_TAG_WM_MIN:
387 compact_zoneinfo_set(parser_data->zone,
388 offsetof(struct zone_info, wm_min), v);
390 case PARSE_TAG_WM_LOW:
391 compact_zoneinfo_set(parser_data->zone,
392 offsetof(struct zone_info, wm_low), v);
395 case PARSE_TAG_WM_HIGH:
396 compact_zoneinfo_set(parser_data->zone,
397 offsetof(struct zone_info, wm_high), v);
399 case PARSE_TAG_MANAGED:
400 compact_zoneinfo_set(parser_data->zone,
401 offsetof(struct zone_info, managed), v);
407 static int compact_parse_pages(const char *s, regmatch_t *match,
408 unsigned int parse_tag, void *data)
410 struct parser_data *parser_data = (struct parser_data *)data;
412 unsigned long v, page_count = 0;
415 if (parse_tag != PARSE_TAG_PAGE_COUNT)
418 for (order = 0; order < MAX_PAGE_ORDER; ++order) {
420 v = strtoul(s, &e, 0);
423 parser_data->zone->pages_per_order[order] = v;
424 page_count += v << order;
428 if (parser_data->zone->free_pages != page_count) {
430 * The drop of number of available pages is being handled
431 * on a per-order basis, thought this might be a good point
432 * to validate the zone's watermarks
434 parser_data->zone->free_pages = page_count;
439 static int compact_get_buddyinfo(struct compact_control *compact)
441 const struct parse_arg args[] = {
442 PARSE_TAG("zone[[:blank:]]+(Normal|DMA|DMA32|HighMem)",
443 compact_parse_zone, ZONE),
444 PARSE_TAG("([[:blank:]]+([0-9]+))+",
445 compact_parse_pages, PAGE_COUNT),
449 struct parser_data parser_data = {
450 .mem_info = compact->mem_info,
451 .zone = &compact->mem_info->zones[0],
454 return proc_parse_buddyinfo(args, &parser_data);
457 static int compact_get_zoneinfo(struct compact_control *compact)
459 const struct parse_arg args[] = {
460 PARSE_TAG("zone[[:blank:]]+(Normal|DMA|DMA32|HighMem)",
461 compact_parse_zone, ZONE),
462 PARSE_TAG("min[[:blank:]]+([0-9]+)\n",
463 compact_parse_zoneinfo, WM_MIN),
464 PARSE_TAG("low[[:blank:]]+([0-9]+)\n",
465 compact_parse_zoneinfo, WM_LOW),
466 PARSE_TAG("high[[:blank:]]+([0-9]+)\n",
467 compact_parse_zoneinfo, WM_HIGH),
468 PARSE_TAG("managed[[:blank:]]+([0-9]+)\n",
469 compact_parse_zoneinfo, MANAGED),
473 struct parser_data parser_data = {
474 .mem_info = compact->mem_info,
475 .zone = &compact->mem_info->zones[0],
477 return proc_parse_zoneinfo(args, &parser_data);
480 static void compact_track_frag_level(struct compact_control *compact)
485 /* Eliminate updates on spurious wake-ups */
487 compact_get_buddyinfo(compact);
488 compaction_verify(compact);
491 pthread_mutex_lock(&compact_data.notify_lock);
492 pthread_cond_wait(&compact_data.notify,
493 &compact_data.notify_lock);
494 pthread_mutex_unlock(&compact_data.notify_lock);
496 pthread_mutex_lock(&compact->lock);
497 woken = compact->status & COMPACT_NOTIFIED ? 1 : 0;
498 compact->status &= ~COMPACT_NOTIFIED;
499 pthread_mutex_unlock(&compact->lock);
501 } while (!(compact->status & COMPACT_SKIP));
505 static int compact_mem_state_changed(void *data)
507 struct compact_control *compact;
508 struct memory_info *mem_info;
509 int result = RESOURCED_ERROR_NONE;
511 pthread_mutex_lock(&compact_data.drained_lock);
512 compact = compact_data.compact;
513 mem_info = compact ? compact->mem_info : NULL;
515 int new_state = *((int *)data);
517 if (new_state < MEM_LEVEL_HIGH || new_state >= MEM_LEVEL_MAX) {
518 result = RESOURCED_ERROR_FAIL;
522 pthread_mutex_lock(&compact_data.compact->lock);
523 if (!(compact->status & COMPACT_SKIP)) {
524 compact->status |= COMPACT_NOTIFIED;
525 pthread_cond_signal(&compact_data.notify);
527 pthread_mutex_unlock(&compact_data.compact->lock);
530 pthread_mutex_unlock(&compact_data.drained_lock);
534 static void compact_cleanup(struct compact_control *compact)
536 struct memory_info *mem_info = compact->mem_info;
538 if (!(compact->status & COMPACT_SKIP))
539 _E("Invalid compact thread state [%d]\n", compact->status);
541 unregister_notifier(RESOURCED_NOTIFIER_MEM_LEVEL_CHANGED,
542 compact_mem_state_changed);
544 (void) pthread_mutex_destroy(&compact->lock);
550 static void *compact_tracer(void *arg)
552 struct compact_data *cdata = (struct compact_data *)arg;
553 struct compact_control *compact = cdata->compact;
555 if (compact_get_zoneinfo(compact) == RESOURCED_ERROR_NONE)
556 compact_track_frag_level(compact);
558 /* Dropped - so clean-up */
559 pthread_mutex_lock(&compact->lock);
560 compact->status |= COMPACT_WITHDRAW;
561 pthread_mutex_unlock(&compact->lock);
563 pthread_mutex_lock(&cdata->drained_lock);
564 compact_cleanup(compact);
565 cdata->compact = NULL;
566 pthread_mutex_unlock(&cdata->drained_lock);
568 pthread_cond_signal(&cdata->drained);
573 static int compact_parse_config_file(struct compact_control *compact)
575 struct compact_conf *compact_conf = get_compact_conf();
577 _E("Compact configuration should not be NULL");
578 return RESOURCED_ERROR_FAIL;
580 if (!compact_conf->enable) {
581 (void) pthread_mutex_lock(&compact->lock);
582 compact->status |= COMPACT_SKIP;
583 (void) pthread_mutex_unlock(&compact->lock);
586 if (compact_conf->frag_level > 0)
587 compact->frag_level = compact_conf->frag_level;
591 _I("[DEBUG] compact status: %d", compact->status);
592 _I("[DEBUG] compact frag_level: %d", compact->frag_level);
594 return RESOURCED_ERROR_NONE;
597 static int compact_init(void *data)
599 struct memory_info *mem_info;
600 struct compact_control *compact;
601 int result = RESOURCED_ERROR_OUT_OF_MEMORY;
603 _I("[DEBUG] compact init");
605 pthread_mutex_lock(&compact_data.drained_lock);
606 if (compact_data.compact) {
607 _E("[DEBUG] Unbalanced calls to compact module load/unload\n");
608 result = RESOURCED_ERROR_NONE;
612 compact = calloc(1, sizeof(*compact));
616 mem_info = calloc(1, sizeof(*mem_info));
620 compact->mem_info = mem_info;
621 compact->frag_level = COMPACT_DEF_FRAG_LEVEL;
623 result = pthread_mutex_init(&compact->lock, NULL);
625 _E("[DEBUG] Failed to init compact lock: %m");
629 /* Load configuration */
630 compact_parse_config_file(compact);
632 if (compact->status & COMPACT_SKIP) {
633 _I("[DEBUG] Compaction module disabled.");
634 result = RESOURCED_ERROR_FAIL;
638 compact_data.compact = compact;
640 result = pthread_create(&compact->compact_thread, NULL,
641 compact_tracer, (void*)&compact_data);
643 compact_data.compact = NULL;
647 pthread_detach(compact->compact_thread);
648 pthread_mutex_unlock(&compact_data.drained_lock);
650 register_notifier(RESOURCED_NOTIFIER_MEM_LEVEL_CHANGED,
651 compact_mem_state_changed);
652 return RESOURCED_ERROR_NONE;
659 pthread_mutex_unlock(&compact_data.drained_lock);
663 static int compact_exit(void *data)
665 struct compact_control *compact;
667 pthread_mutex_lock(&compact_data.drained_lock);
668 compact = compact_data.compact;
669 compact_data.compact = NULL;
674 pthread_mutex_lock(&compact->lock);
675 compact->status |= COMPACT_CANCEL;
676 pthread_mutex_unlock(&compact->lock);
678 pthread_cond_signal(&compact_data.notify);
679 pthread_cond_wait(&compact_data.drained, &compact_data.drained_lock);
681 pthread_mutex_unlock(&compact_data.drained_lock);
685 static int compact_runtime_support(void *data)
687 _cleanup_close_ int fd = -1;
689 fd = open(PROC_COMPACT_ENTRY, O_WRONLY);
691 _E("Unable to open compaction procfs entry\n");
692 return RESOURCED_ERROR_NO_DATA;
694 return RESOURCED_ERROR_NONE;
697 static struct module_ops compact_module_ops = {
698 .priority = MODULE_PRIORITY_LATE,
700 .init = compact_init,
701 .exit = compact_exit,
702 .check_runtime_support = compact_runtime_support,
705 MODULE_REGISTER(&compact_module_ops)