lowmem-monitor: Add PSI monitoring method 54/295654/5
authorSangYoun Kwak <sy.kwak@samsung.com>
Wed, 12 Jul 2023 07:12:27 +0000 (16:12 +0900)
committerSangYoun Kwak <sy.kwak@samsung.com>
Thu, 13 Jul 2023 08:31:29 +0000 (17:31 +0900)
PSI, the "Pressure Stall Information" functionality of the kernel
monitors the stall time of CPU, IO and Memory.
Since it monitors memory stall, it can be used as a monitoring method of
the LMK.

As a new monitoring method, PSI monitoring has been added to the LMK.
(To use this method, the kernel should support PSI)

Change-Id: Ic19fb8a74b4047788b8ce52f18a1f04d028ef136
Signed-off-by: SangYoun Kwak <sy.kwak@samsung.com>
src/resource-limiter/memory/lowmem-monitor-psi.c [new file with mode: 0644]

diff --git a/src/resource-limiter/memory/lowmem-monitor-psi.c b/src/resource-limiter/memory/lowmem-monitor-psi.c
new file mode 100644 (file)
index 0000000..b15e628
--- /dev/null
@@ -0,0 +1,389 @@
+/**
+ * resourced
+ *
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file lowmem-monitor-psi.c
+ * @desc Provides monitor functionalities to detect lowmem state using PSI
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <pthread.h>
+
+#include "lowmem.h"
+#include "procfs.h"
+#include "trace.h"
+#include "module.h"
+
+#define EPOLL_LISTENER_POLLING_PERIOD_SECONDS  3
+#define EPOLL_LISTENER_MAX_EVENTS              100
+#define BUFF_MAX                               255
+
+#define PSI_PATH                               "/proc/pressure"
+#define PSI_MEMORY_PATH PSI_PATH               "/memory"
+#define PSI_TYPE_SOME                          "some"
+#define PSI_TYPE_FULL                          "full"
+
+struct psi_memory_monitor_info {
+       const int mem_level;
+       const char *mem_level_str;
+       const char *psi_type;
+       const int stall_us;
+       const int window_us;
+       int fd;
+};
+
+typedef void *(*epoll_event_handler)(void *data);
+
+struct epoll_event_data {
+       epoll_event_handler handler;
+       void *data;
+};
+
+static int g_psi_monitor_epoll_fd = -1;
+static pthread_t g_psi_monitor_thread;
+static int g_psi_monitor_thread_destroy_event_fd = -1;
+
+static void assert_mem_level(int mem_level)
+{
+       switch (mem_level) {
+       case MEM_LEVEL_HIGH:
+       case MEM_LEVEL_MEDIUM:
+       case MEM_LEVEL_LOW:
+       case MEM_LEVEL_CRITICAL:
+       case MEM_LEVEL_OOM:
+               return;
+       default:
+               assert("Invalid memory level" && 0);
+       }
+}
+
+static void *psi_memory_monitor_handler(struct psi_memory_monitor_info *info)
+{
+       assert_mem_level(info->mem_level);
+
+       lowmem_trigger_memory_state_action(info->mem_level);
+
+       return NULL;
+}
+
+static void *psi_monitor_thread_destroy_handler(int *efd)
+{
+       eventfd_t dummy = 0;
+       eventfd_read(*efd, &dummy);
+
+       pthread_exit(NULL);
+}
+
+static struct epoll_event_data g_psi_monitor_thread_destroy_event_data = {
+       .handler = (epoll_event_handler)psi_monitor_thread_destroy_handler,
+       .data = &g_psi_monitor_thread_destroy_event_fd,
+};
+
+static int register_psi_event_epoll(struct epoll_event_data *event_data)
+{
+       char trigger_description[BUFF_MAX] = { 0 };
+       int trigger_description_len = -1;
+       int ret = 0;
+       int fd = -1;
+       struct psi_memory_monitor_info *info = NULL;
+       struct epoll_event event;
+
+       assert(g_psi_monitor_epoll_fd >= 0);
+
+       assert(event_data);
+       info = event_data->data;
+       assert(info);
+       assert_mem_level(info->mem_level);
+
+       fd = open(PSI_MEMORY_PATH, O_RDWR | O_NONBLOCK);
+       if (fd < 0) {
+               _E("Failed to open psi node: path=%s, errno=%d",
+                                       PSI_MEMORY_PATH, errno);
+               goto error_1;
+       }
+       info->fd = fd;
+
+       event.events = EPOLLPRI;
+       event.data.ptr = (void *)event_data;
+
+       trigger_description_len = snprintf(trigger_description, BUFF_MAX,
+                                                       "%s %d %d",
+                                                       info->psi_type,
+                                                       info->stall_us,
+                                                       info->window_us);
+       if (trigger_description_len < 0) {
+               _E("Failed to write PSI trigger description: returned value=%d",
+                                               trigger_description_len);
+               goto error_2;
+       }
+       if (trigger_description_len >= BUFF_MAX) {
+               _E("Failed to write PSI trigger description: buffer is not enough");
+               goto error_2;
+       }
+
+       ret = write(fd, trigger_description, trigger_description_len + 1);
+       if (ret < 0) {
+               _E("Failed to write PSI trigger description(\"%s\"): errno=%d",
+                                               trigger_description, errno);
+               goto error_2;
+       }
+
+       ret = epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_ADD, fd, &event);
+       if (ret < 0) {
+               _E("Failed to add epoll: errno=%d", errno);
+               goto error_2;
+       }
+
+       _I("PSI event registered(%s %d %d) as %s mem_level", info->psi_type,
+                                                       info->stall_us,
+                                                       info->window_us,
+                                                       info->mem_level_str);
+
+       return 0;
+
+error_2:
+       close(fd);
+       info->fd = -1;
+error_1:
+       _E("Failed to register psi event: mem_level=%s", info->mem_level_str);
+
+       return RESOURCED_ERROR_FAIL;
+}
+
+static void unregister_psi_event_epoll(struct epoll_event_data *event_data)
+{
+       struct psi_memory_monitor_info *info = NULL;
+
+       assert(event_data);
+
+       info = event_data->data;
+       assert(info);
+       assert_mem_level(info->mem_level);
+
+       if (info->fd < 0)
+               return;
+
+       epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_DEL, info->fd, NULL);
+       close(info->fd);
+       info->fd = -1;
+}
+
+static void *psi_monitor_thread_worker(void *data)
+{
+       struct epoll_event events[EPOLL_LISTENER_MAX_EVENTS];
+       int events_num = 0;
+
+       while (1) {
+               events_num = epoll_wait(g_psi_monitor_epoll_fd, events, EPOLL_LISTENER_MAX_EVENTS, 0);
+
+               for (int i = 0; i < events_num; ++i) {
+                       if (events[i].events & (EPOLLERR | EPOLLHUP))
+                               continue;
+
+                       struct epoll_event_data *event_data = events[i].data.ptr;
+                       event_data->handler(event_data->data);
+               }
+
+               sleep(EPOLL_LISTENER_POLLING_PERIOD_SECONDS);
+       }
+
+       pthread_exit(NULL);
+}
+
+static int create_psi_monitor_thread(void)
+{
+       int ret = 0;
+       struct epoll_event event;
+
+       g_psi_monitor_thread_destroy_event_fd = eventfd(0, EFD_CLOEXEC);
+       if (g_psi_monitor_thread_destroy_event_fd < 0) {
+               _E("Failed to create eventfd for thread: errno=%d", errno);
+               return RESOURCED_ERROR_FAIL;
+       }
+       g_psi_monitor_thread_destroy_event_data.data =
+                       (void *)(&g_psi_monitor_thread_destroy_event_fd);
+
+       event.events = EPOLLIN;
+       event.data.ptr = (void *)(&g_psi_monitor_thread_destroy_event_data);
+
+       ret = epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_ADD, g_psi_monitor_thread_destroy_event_fd, &event);
+       if (ret != 0) {
+               close(g_psi_monitor_thread_destroy_event_fd);
+               g_psi_monitor_thread_destroy_event_fd = -1;
+               _E("Failed to add eventfd for thread: errno=%d", errno);
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       ret = pthread_create(&g_psi_monitor_thread, NULL, psi_monitor_thread_worker, NULL);
+       if (ret != 0) {
+               epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_DEL, g_psi_monitor_thread_destroy_event_fd, NULL);
+               close(g_psi_monitor_thread_destroy_event_fd);
+               g_psi_monitor_thread_destroy_event_fd = -1;
+               _E("Failed to create psi monitor thread: errno=%d", errno);
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       return RESOURCED_ERROR_NONE;
+}
+
+static void destroy_psi_monitor_thread(void)
+{
+       eventfd_write(g_psi_monitor_thread_destroy_event_fd, 1);
+       pthread_join(g_psi_monitor_thread, NULL);
+
+       epoll_ctl(g_psi_monitor_epoll_fd, EPOLL_CTL_DEL, g_psi_monitor_thread_destroy_event_fd, NULL);
+       close(g_psi_monitor_thread_destroy_event_fd);
+       g_psi_monitor_thread_destroy_event_fd = -1;
+}
+
+static struct psi_memory_monitor_info g_psi_memory_monitor_info_list[MEM_LEVEL_MAX] = {
+       [MEM_LEVEL_HIGH] = {
+               .mem_level = MEM_LEVEL_HIGH,
+               .mem_level_str = "high",
+               .psi_type = PSI_TYPE_SOME,
+               .stall_us = 70000,
+               .window_us = 1000000,
+               .fd = -1,
+       },
+       [MEM_LEVEL_MEDIUM] = {
+               .mem_level = MEM_LEVEL_MEDIUM,
+               .mem_level_str = "medium",
+               .psi_type = PSI_TYPE_SOME,
+               .stall_us = 70000,
+               .window_us = 1000000,
+               .fd = -1,
+       },
+       [MEM_LEVEL_LOW] = {
+               .mem_level = MEM_LEVEL_LOW,
+               .mem_level_str = "low",
+               .psi_type = PSI_TYPE_SOME,
+               .stall_us = 150000,
+               .window_us = 1000000,
+               .fd = -1,
+       },
+       [MEM_LEVEL_CRITICAL] = {
+               .mem_level = MEM_LEVEL_CRITICAL,
+               .mem_level_str = "critical",
+               .psi_type = PSI_TYPE_FULL,
+               .stall_us = 70000,
+               .window_us = 1000000,
+               .fd = -1,
+       },
+       [MEM_LEVEL_OOM] = {
+               .mem_level = MEM_LEVEL_OOM,
+               .mem_level_str = "oom",
+               .psi_type = PSI_TYPE_FULL,
+               .stall_us = 150000,
+               .window_us = 1000000,
+               .fd = -1,
+       }
+};
+
+static struct epoll_event_data g_psi_event_datas[MEM_LEVEL_MAX] = {
+       [MEM_LEVEL_HIGH] = {
+               .handler = (epoll_event_handler)psi_memory_monitor_handler,
+               .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_HIGH],
+       },
+       [MEM_LEVEL_MEDIUM] = {
+               .handler = (epoll_event_handler)psi_memory_monitor_handler,
+               .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_MEDIUM],
+       },
+       [MEM_LEVEL_LOW] = {
+               .handler = (epoll_event_handler)psi_memory_monitor_handler,
+               .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_LOW],
+       },
+       [MEM_LEVEL_CRITICAL] = {
+               .handler = (epoll_event_handler)psi_memory_monitor_handler,
+               .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_CRITICAL],
+       },
+       [MEM_LEVEL_OOM] = {
+               .handler = (epoll_event_handler)psi_memory_monitor_handler,
+               .data = &g_psi_memory_monitor_info_list[MEM_LEVEL_OOM],
+       }
+};
+
+static int register_psi_events(void)
+{
+       int mem_level = -1;
+
+       g_psi_monitor_epoll_fd = epoll_create(1);
+       if (g_psi_monitor_epoll_fd < 0) {
+               _E("Failed to create epoll fd: %d", -errno);
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       for (mem_level = MEM_LEVEL_HIGH; mem_level < MEM_LEVEL_MAX; ++mem_level) {
+               if (register_psi_event_epoll(&g_psi_event_datas[mem_level]) < 0)
+                       break;
+       }
+
+       if (mem_level < MEM_LEVEL_MAX) {
+               for (mem_level = mem_level - 1; mem_level >= MEM_LEVEL_HIGH; --mem_level)
+                       unregister_psi_event_epoll(&g_psi_event_datas[mem_level]);
+               close(g_psi_monitor_epoll_fd);
+               g_psi_monitor_epoll_fd = -1;
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       return RESOURCED_ERROR_NONE;
+}
+
+static void unregister_psi_events(void)
+{
+       for (int mem_level = MEM_LEVEL_HIGH; mem_level < MEM_LEVEL_MAX; ++mem_level)
+               unregister_psi_event_epoll(&g_psi_event_datas[mem_level]);
+       close(g_psi_monitor_epoll_fd);
+       g_psi_monitor_epoll_fd = -1;
+}
+
+static int lowmem_monitor_psi_initialize(void *data)
+{
+       if (register_psi_events() != RESOURCED_ERROR_NONE) {
+               _E("Failed to register psi fds to epoll fd");
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       if (create_psi_monitor_thread() != RESOURCED_ERROR_NONE) {
+               _E("Failed to create psi monitor thread");
+               unregister_psi_events();
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       return RESOURCED_ERROR_NONE;
+}
+
+static int lowmem_monitor_psi_finalize(void *data)
+{
+       destroy_psi_monitor_thread();
+       unregister_psi_events();
+
+       return RESOURCED_ERROR_NONE;
+}
+
+static struct module_ops g_lowmem_monitor_psi_modules_ops = {
+       .priority       = MODULE_PRIORITY_INITIAL,
+       .name           = "lowmem-monitor-psi",
+       .init           = lowmem_monitor_psi_initialize,
+       .exit           = lowmem_monitor_psi_finalize,
+};
+
+MODULE_REGISTER(&g_lowmem_monitor_psi_modules_ops)