From 557b77876a5bf6e147359c62c07de6209084ad66 Mon Sep 17 00:00:00 2001 From: SangYoun Kwak Date: Wed, 12 Jul 2023 16:12:27 +0900 Subject: [PATCH] lowmem-monitor: Add PSI monitoring method PSI, the "Pressure Stall Information" functionality of the kernel monitors the stall time of CPU, IO and Memory. Since it monitors memory stall, it can be used as a monitoring method of the LMK. As a new monitoring method, PSI monitoring has been added to the LMK. (To use this method, the kernel should support PSI) Change-Id: Ic19fb8a74b4047788b8ce52f18a1f04d028ef136 Signed-off-by: SangYoun Kwak --- src/resource-limiter/memory/lowmem-monitor-psi.c | 389 +++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 src/resource-limiter/memory/lowmem-monitor-psi.c diff --git a/src/resource-limiter/memory/lowmem-monitor-psi.c b/src/resource-limiter/memory/lowmem-monitor-psi.c new file mode 100644 index 0000000..b15e628 --- /dev/null +++ b/src/resource-limiter/memory/lowmem-monitor-psi.c @@ -0,0 +1,389 @@ +/** + * resourced + * + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lowmem-monitor-psi.c + * @desc Provides monitor functionalities to detect lowmem state using PSI + */ + +#include +#include +#include +#include +#include +#include + +#include "lowmem.h" +#include "procfs.h" +#include "trace.h" +#include "module.h" + +#define EPOLL_LISTENER_POLLING_PERIOD_SECONDS 3 +#define EPOLL_LISTENER_MAX_EVENTS 100 +#define BUFF_MAX 255 + +#define PSI_PATH "/proc/pressure" +#define PSI_MEMORY_PATH PSI_PATH "/memory" +#define PSI_TYPE_SOME "some" +#define PSI_TYPE_FULL "full" + +struct psi_memory_monitor_info { + const int mem_level; + const char *mem_level_str; + const char *psi_type; + const int stall_us; + const int window_us; + int fd; +}; + +typedef void *(*epoll_event_handler)(void *data); + +struct epoll_event_data { + epoll_event_handler handler; + void *data; +}; + +static int g_psi_monitor_epoll_fd = -1; +static pthread_t g_psi_monitor_thread; +static int g_psi_monitor_thread_destroy_event_fd = -1; + +static void assert_mem_level(int mem_level) +{ + switch (mem_level) { + case MEM_LEVEL_HIGH: + case MEM_LEVEL_MEDIUM: + case MEM_LEVEL_LOW: + case MEM_LEVEL_CRITICAL: + case MEM_LEVEL_OOM: + return; + default: + assert("Invalid memory level" && 0); + } +} + +static void *psi_memory_monitor_handler(struct psi_memory_monitor_info *info) +{ + assert_mem_level(info->mem_level); + + lowmem_trigger_memory_state_action(info->mem_level); + + return NULL; +} + +static void *psi_monitor_thread_destroy_handler(int *efd) +{ + eventfd_t dummy = 0; + eventfd_read(*efd, &dummy); + + pthread_exit(NULL); +} + +static struct epoll_event_data g_psi_monitor_thread_destroy_event_data = { + .handler = (epoll_event_handler)psi_monitor_thread_destroy_handler, + .data = &g_psi_monitor_thread_destroy_event_fd, +}; + +static int register_psi_event_epoll(struct epoll_event_data *event_data) +{ + char trigger_description[BUFF_MAX] = { 0 }; + int trigger_description_len = -1; + int ret = 0; + int fd = -1; + struct psi_memory_monitor_info *info = NULL; + struct epoll_event event; + + assert(g_psi_monitor_epoll_fd >= 0); + + assert(event_data); + info = event_data->data; + assert(info); + assert_mem_level(info->mem_level); + + fd = open(PSI_MEMORY_PATH, O_RDWR | O_NONBLOCK); + if (fd < 0) { + _E("Failed to open psi node: path=%s, errno=%d", + PSI_MEMORY_PATH, errno); + goto error_1; + } + info->fd = fd; + + event.events = EPOLLPRI; + event.data.ptr = (void *)event_data; + + trigger_description_len = snprintf(trigger_description, BUFF_MAX, + "%s %d %d", + info->psi_type, + info->stall_us, + info->window_us); + if (trigger_description_len < 0) { + _E("Failed to write PSI trigger description: returned value=%d", + trigger_description_len); + goto error_2; + } + if (trigger_description_len >= BUFF_MAX) { + _E("Failed to write PSI trigger description: buffer is not enough"); + goto error_2; + } + + ret = write(fd, trigger_description, trigger_description_len + 1); + if (ret < 0) { + _E("Failed to write PSI trigger description(\"%s\"): errno=%d", + trigger_description, errno); + goto error_2; + } + + ret = epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_ADD, fd, &event); + if (ret < 0) { + _E("Failed to add epoll: errno=%d", errno); + goto error_2; + } + + _I("PSI event registered(%s %d %d) as %s mem_level", info->psi_type, + info->stall_us, + info->window_us, + info->mem_level_str); + + return 0; + +error_2: + close(fd); + info->fd = -1; +error_1: + _E("Failed to register psi event: mem_level=%s", info->mem_level_str); + + return RESOURCED_ERROR_FAIL; +} + +static void unregister_psi_event_epoll(struct epoll_event_data *event_data) +{ + struct psi_memory_monitor_info *info = NULL; + + assert(event_data); + + info = event_data->data; + assert(info); + assert_mem_level(info->mem_level); + + if (info->fd < 0) + return; + + epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_DEL, info->fd, NULL); + close(info->fd); + info->fd = -1; +} + +static void *psi_monitor_thread_worker(void *data) +{ + struct epoll_event events[EPOLL_LISTENER_MAX_EVENTS]; + int events_num = 0; + + while (1) { + events_num = epoll_wait(g_psi_monitor_epoll_fd, events, EPOLL_LISTENER_MAX_EVENTS, 0); + + for (int i = 0; i < events_num; ++i) { + if (events[i].events & (EPOLLERR | EPOLLHUP)) + continue; + + struct epoll_event_data *event_data = events[i].data.ptr; + event_data->handler(event_data->data); + } + + sleep(EPOLL_LISTENER_POLLING_PERIOD_SECONDS); + } + + pthread_exit(NULL); +} + +static int create_psi_monitor_thread(void) +{ + int ret = 0; + struct epoll_event event; + + g_psi_monitor_thread_destroy_event_fd = eventfd(0, EFD_CLOEXEC); + if (g_psi_monitor_thread_destroy_event_fd < 0) { + _E("Failed to create eventfd for thread: errno=%d", errno); + return RESOURCED_ERROR_FAIL; + } + g_psi_monitor_thread_destroy_event_data.data = + (void *)(&g_psi_monitor_thread_destroy_event_fd); + + event.events = EPOLLIN; + event.data.ptr = (void *)(&g_psi_monitor_thread_destroy_event_data); + + ret = epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_ADD, g_psi_monitor_thread_destroy_event_fd, &event); + if (ret != 0) { + close(g_psi_monitor_thread_destroy_event_fd); + g_psi_monitor_thread_destroy_event_fd = -1; + _E("Failed to add eventfd for thread: errno=%d", errno); + return RESOURCED_ERROR_FAIL; + } + + ret = pthread_create(&g_psi_monitor_thread, NULL, psi_monitor_thread_worker, NULL); + if (ret != 0) { + epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_DEL, g_psi_monitor_thread_destroy_event_fd, NULL); + close(g_psi_monitor_thread_destroy_event_fd); + g_psi_monitor_thread_destroy_event_fd = -1; + _E("Failed to create psi monitor thread: errno=%d", errno); + return RESOURCED_ERROR_FAIL; + } + + return RESOURCED_ERROR_NONE; +} + +static void destroy_psi_monitor_thread(void) +{ + eventfd_write(g_psi_monitor_thread_destroy_event_fd, 1); + pthread_join(g_psi_monitor_thread, NULL); + + epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_DEL, g_psi_monitor_thread_destroy_event_fd, NULL); + close(g_psi_monitor_thread_destroy_event_fd); + g_psi_monitor_thread_destroy_event_fd = -1; +} + +static struct psi_memory_monitor_info g_psi_memory_monitor_info_list[MEM_LEVEL_MAX] = { + [MEM_LEVEL_HIGH] = { + .mem_level = MEM_LEVEL_HIGH, + .mem_level_str = "high", + .psi_type = PSI_TYPE_SOME, + .stall_us = 70000, + .window_us = 1000000, + .fd = -1, + }, + [MEM_LEVEL_MEDIUM] = { + .mem_level = MEM_LEVEL_MEDIUM, + .mem_level_str = "medium", + .psi_type = PSI_TYPE_SOME, + .stall_us = 70000, + .window_us = 1000000, + .fd = -1, + }, + [MEM_LEVEL_LOW] = { + .mem_level = MEM_LEVEL_LOW, + .mem_level_str = "low", + .psi_type = PSI_TYPE_SOME, + .stall_us = 150000, + .window_us = 1000000, + .fd = -1, + }, + [MEM_LEVEL_CRITICAL] = { + .mem_level = MEM_LEVEL_CRITICAL, + .mem_level_str = "critical", + .psi_type = PSI_TYPE_FULL, + .stall_us = 70000, + .window_us = 1000000, + .fd = -1, + }, + [MEM_LEVEL_OOM] = { + .mem_level = MEM_LEVEL_OOM, + .mem_level_str = "oom", + .psi_type = PSI_TYPE_FULL, + .stall_us = 150000, + .window_us = 1000000, + .fd = -1, + } +}; + +static struct epoll_event_data g_psi_event_datas[MEM_LEVEL_MAX] = { + [MEM_LEVEL_HIGH] = { + .handler = (epoll_event_handler)psi_memory_monitor_handler, + .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_HIGH], + }, + [MEM_LEVEL_MEDIUM] = { + .handler = (epoll_event_handler)psi_memory_monitor_handler, + .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_MEDIUM], + }, + [MEM_LEVEL_LOW] = { + .handler = (epoll_event_handler)psi_memory_monitor_handler, + .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_LOW], + }, + [MEM_LEVEL_CRITICAL] = { + .handler = (epoll_event_handler)psi_memory_monitor_handler, + .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_CRITICAL], + }, + [MEM_LEVEL_OOM] = { + .handler = (epoll_event_handler)psi_memory_monitor_handler, + .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_OOM], + } +}; + +static int register_psi_events(void) +{ + int mem_level = -1; + + g_psi_monitor_epoll_fd = epoll_create(1); + if (g_psi_monitor_epoll_fd < 0) { + _E("Failed to create epoll fd: %d", -errno); + return RESOURCED_ERROR_FAIL; + } + + for (mem_level = MEM_LEVEL_HIGH; mem_level < MEM_LEVEL_MAX; ++mem_level) { + if (register_psi_event_epoll(&g_psi_event_datas[mem_level]) < 0) + break; + } + + if (mem_level < MEM_LEVEL_MAX) { + for (mem_level = mem_level - 1; mem_level >= MEM_LEVEL_HIGH; --mem_level) + unregister_psi_event_epoll(&g_psi_event_datas[mem_level]); + close(g_psi_monitor_epoll_fd); + g_psi_monitor_epoll_fd = -1; + return RESOURCED_ERROR_FAIL; + } + + return RESOURCED_ERROR_NONE; +} + +static void unregister_psi_events(void) +{ + for (int mem_level = MEM_LEVEL_HIGH; mem_level < MEM_LEVEL_MAX; ++mem_level) + unregister_psi_event_epoll(&g_psi_event_datas[mem_level]); + close(g_psi_monitor_epoll_fd); + g_psi_monitor_epoll_fd = -1; +} + +static int lowmem_monitor_psi_initialize(void *data) +{ + if (register_psi_events() != RESOURCED_ERROR_NONE) { + _E("Failed to register psi fds to epoll fd"); + return RESOURCED_ERROR_FAIL; + } + + if (create_psi_monitor_thread() != RESOURCED_ERROR_NONE) { + _E("Failed to create psi monitor thread"); + unregister_psi_events(); + return RESOURCED_ERROR_FAIL; + } + + return RESOURCED_ERROR_NONE; +} + +static int lowmem_monitor_psi_finalize(void *data) +{ + destroy_psi_monitor_thread(); + unregister_psi_events(); + + return RESOURCED_ERROR_NONE; +} + +static struct module_ops g_lowmem_monitor_psi_modules_ops = { + .priority = MODULE_PRIORITY_INITIAL, + .name = "lowmem-monitor-psi", + .init = lowmem_monitor_psi_initialize, + .exit = lowmem_monitor_psi_finalize, +}; + +MODULE_REGISTER(&g_lowmem_monitor_psi_modules_ops) -- 2.7.4