cpu-boosting: Modify cpu boosting level to handle cpu contention 38/297138/7
authorUnsung Lee <unsung.lee@samsung.com>
Thu, 10 Aug 2023 23:55:39 +0000 (08:55 +0900)
committerUnsung Lee <unsung.lee@samsung.com>
Wed, 30 Aug 2023 03:17:00 +0000 (12:17 +0900)
Modify cpu boosting level to handle cpu contention.
The cpu boosting governor makes a list of commands to handle cpu contention,
and it decreases cpu boosting level of some threads (victims).

The cpu boosting module waits for timer events to check whehter cpu contention
is alleviated or not. If so, restoring cpu boosting level.
If not, making a new timer to check cpu contention after some interval.

Currently, cpu boosting module just checks whether PSI events are not
triggered for some interval. It is not the best solution, but simple
and intuitive solution.

Change-Id: I897c89c054ab11ed08ad16ef171610d7fa2944bd
Signed-off-by: Unsung Lee <unsung.lee@samsung.com>
src/resource-optimizer/cpu/cpu-boosting.c

index fcb4cc9..cfd3225 100644 (file)
@@ -25,6 +25,8 @@
 /* key = tid, value = cpu_boosting info per thread */
 static GHashTable *g_cpu_boosting_info_table[CPU_BOOSTING_LEVEL_END] = { NULL, };
 static GHashTable *dest_table;  /* key = destiation process name, value = cpu_boosting_input */
+/* Key = tid, value = cpu_contention_handle_data */
+static GHashTable *g_cpu_contention_handle_data_table;
 
 pthread_t cpu_boosting_thread;
 static struct sched_attr cpu_boosting_attr[CPU_BOOSTING_LEVEL_END];
@@ -33,16 +35,35 @@ static bool cpu_boosting_success[CPU_BOOSTING_LEVEL_END] = {false, };
 GMainLoop *cpu_boosting_loop;
 GMainContext *cpu_boosting_context;
 
+static guint g_latest_timer_id;
+
+#define CPU_CONTENTION_HANDLE_TIMEOUT_MSEC 2000
 #define SOCK_PATH "/run/.resourced.socket"
 
-#define CPU_BOOSTING_SET_REQUEST(_input, _command, _level, _timeout_msec, _pid) \
-{                                                                               \
-       (_input)->client_input.command = _command;                                  \
-       (_input)->client_input.timeout_msec = _timeout_msec;                        \
-       (_input)->client_input.level = _level;                                      \
-       (_input)->client_input.pid = _pid;                                          \
+/**
+ * Information to restore CPU boosting level
+ * before dealing with CPU contention
+ */
+struct cpu_contention_handle_data {
+       pid_t tid;
+       guint timer_id;
+       cpu_boosting_level_e origin_cpu_boosting_level;
+       cpu_boosting_level_e current_cpu_boosting_level;
+};
+
+#define CPU_BOOSTING_SET_REQUEST(_input, _command, _level,                     \
+               _timeout_msec, _pid, _flags)                                    \
+{                                                                              \
+       (_input)->client_input.command = _command;                              \
+       (_input)->client_input.timeout_msec = _timeout_msec;                    \
+       (_input)->client_input.level = _level;                                  \
+       (_input)->client_input.pid = _pid;                                      \
+       (_input)->client_input.flags = _flags;                                  \
 }
 
+static void cpu_boosting_handle_command(
+               struct syscommon_resourced_cpu_boosting_input *input);
+
 static void free_cpu_boosting_info(gpointer data)
 {
        struct syscommon_resourced_cpu_boosting_info *cpu_boosting_info =
@@ -55,6 +76,17 @@ static void free_cpu_boosting_info(gpointer data)
                free(cpu_boosting_info);
 }
 
+static void free_cpu_contention_handle_data(gpointer data)
+{
+       struct cpu_contention_handle_data *cpu_contention_handle_data =
+               (struct cpu_contention_handle_data *)data;
+
+       assert(cpu_contention_handle_data);
+
+       g_slice_free(struct cpu_contention_handle_data,
+                       cpu_contention_handle_data);
+}
+
 static void remove_cpu_boosting_info_in_tables(int *tid)
 {
        gboolean ret_removed;
@@ -81,23 +113,260 @@ static void find_cpu_boosting_info_in_tables(
        }
 }
 
+static bool is_cpu_contention_alleviated(guint timer_id)
+{
+       /**
+        * If additional monitor events are not triggered
+        * for CPU_CONTENTION_HANDLE_TIMEOUT_MSEC, then restore CPU boosting level.
+        */
+       if (g_latest_timer_id == timer_id)
+               return true;
+
+       return false;
+}
+
+static int restore_cpu_boosting_level(void)
+{
+       GHashTableIter cpu_contention_handle_data_iter;
+       guint **gsource_list = NULL;
+       pid_t **tid_list = NULL;
+       int hash_size;
+       gpointer tid;
+       gpointer value;
+       int hash_index = 0;
+       int ret;
+
+       hash_size = g_hash_table_size(g_cpu_contention_handle_data_table);
+       if (hash_size == 0) {
+               _E("[CPU-BOOSTING] No thread to restore cpu boosting level");
+               return RESOURCED_ERROR_FAIL;
+       }
+
+       /**
+        * Secure memory first before running main loop.
+        * If the current memory is not enough, then do not restore now
+        * and wait for interval time.
+        */
+       tid_list = (pid_t **)calloc(hash_size, sizeof(pid_t *));
+       if (tid_list == NULL) {
+               _E("[CPU-BOOSTING] Failed to allocate memory");
+               return RESOURCED_ERROR_OUT_OF_MEMORY;
+       }
+
+       gsource_list = (guint **)calloc(hash_size, sizeof(guint *));
+       if (gsource_list == NULL) {
+               _E("[CPU-BOOSTING] Failed to allocate memory");
+               goto free_mem;
+       }
+
+       for (hash_index = 0; hash_index < hash_size; hash_index++) {
+               tid_list[hash_index] = (pid_t *)calloc(1, sizeof(pid_t));
+               if (tid_list[hash_index] == NULL)
+                       goto free_mem;
+
+               gsource_list[hash_index] = g_new(guint, 1);
+       }
+
+       hash_index = 0;
+       g_hash_table_iter_init(&cpu_contention_handle_data_iter,
+                       g_cpu_contention_handle_data_table);
+       while (1) {
+               struct syscommon_resourced_cpu_boosting_input *cpu_boosting_input;
+               struct syscommon_resourced_cpu_boosting_info *cpu_boosting_info;
+               struct cpu_contention_handle_data *cpu_contention_handle_data;
+               cpu_boosting_level_e current_cpu_boosting_level;
+               cpu_boosting_level_e origin_cpu_boosting_level;
+               resource_pid_t resource_pid;
+
+               ret = g_hash_table_iter_next(&cpu_contention_handle_data_iter,
+                               &tid, &value);
+               if (ret == 0)
+                       break;
+
+               cpu_contention_handle_data =
+                       (struct cpu_contention_handle_data *)value;
+               current_cpu_boosting_level =
+                       cpu_contention_handle_data->current_cpu_boosting_level;
+               origin_cpu_boosting_level =
+                       cpu_contention_handle_data->origin_cpu_boosting_level;
+
+               /**
+                * Cpu boosting can be already finished
+                * by clearing cpu boosting requets from the source or timeout.
+                * Therefore, first check whether the corresponding thread
+                * cpu boosting is still valid.
+                */
+               cpu_boosting_info = g_hash_table_lookup(
+                               g_cpu_boosting_info_table[current_cpu_boosting_level], tid);
+               if (cpu_boosting_info == NULL)
+                       continue;
+
+               cpu_boosting_input =
+                       g_slice_new0(struct syscommon_resourced_cpu_boosting_input);
+
+               resource_pid.pid = 0;
+               resource_pid.tid_count = 1;
+               resource_pid.tid = tid_list[hash_index];
+               resource_pid.tid[0] = g_int_hash(tid);
+
+               cpu_boosting_input->remove_input = true;
+               cpu_boosting_input->gsource_id = gsource_list[hash_index];
+               cpu_boosting_input->gsource_id[0] =
+                       cpu_boosting_info->gsource_id;
+
+               hash_index++;
+
+               CPU_BOOSTING_SET_REQUEST(cpu_boosting_input,
+                               CPU_BOOSTING_COMMAND_SET, origin_cpu_boosting_level,
+                               -1, resource_pid, cpu_boosting_info->cpu_boosting_flags);
+
+               /**
+                * tid_list[index] and gsource_list[index] will be freed
+                * after calling this function.
+                */
+               cpu_boosting_handle_command(cpu_boosting_input);
+       }
+
+       for (int index = hash_index; index < hash_size; index++) {
+               free(tid_list[index]);
+               g_free(gsource_list[index]);
+       }
+
+       free(tid_list);
+       g_free(gsource_list);
+       g_hash_table_remove_all(g_cpu_contention_handle_data_table);
+
+       return RESOURCED_ERROR_NONE;
+
+free_mem:
+       for (int index = 0; index < hash_index; index++) {
+               free(tid_list[index]);
+               g_free(gsource_list[index]);
+       }
+
+       free(tid_list);
+       free(gsource_list);
+
+       return RESOURCED_ERROR_OUT_OF_MEMORY;
+}
+
+static gboolean cpu_contention_handle_timeout(gpointer data)
+{
+       guint *timer_id =(guint *)data;
+       int ret = RESOURCED_ERROR_NONE;
+
+       if (timer_id == NULL) {
+               _E("[CPU-BOOSTING] Timer id cannot be NULL");
+               return G_SOURCE_REMOVE;
+       }
+
+       if (is_cpu_contention_alleviated(*timer_id))
+               ret = restore_cpu_boosting_level();
+
+       if (ret == RESOURCED_ERROR_OUT_OF_MEMORY)
+               return G_SOURCE_CONTINUE;
+
+       g_free(timer_id);
+
+       return G_SOURCE_REMOVE;
+}
+
+static void register_cpu_contention_handle_data(
+       struct syscommon_resourced_cpu_boosting_input *input,
+       cpu_boosting_level_e origin_cpu_boosting_level,
+       guint timer_id)
+{
+       struct cpu_contention_handle_data *cpu_contention_handle_data = NULL;
+
+       assert(input);
+
+       /**
+        * Register information to restore cpu boosting level
+        * before dealing with CPU contention.
+        */
+       for (int i = 0; i < input->client_input.pid.tid_count; i++) {
+               cpu_contention_handle_data = (struct cpu_contention_handle_data *)
+                       g_hash_table_lookup(g_cpu_contention_handle_data_table,
+                                       &input->client_input.pid.tid[i]);
+               if (cpu_contention_handle_data) {
+                       cpu_contention_handle_data->current_cpu_boosting_level =
+                               input->client_input.level;
+                       cpu_contention_handle_data->timer_id = timer_id;
+                       continue;
+               }
+
+               cpu_contention_handle_data = (struct cpu_contention_handle_data *)
+                       g_slice_new0(struct cpu_contention_handle_data);
+               cpu_contention_handle_data->tid = input->client_input.pid.tid[i];
+               cpu_contention_handle_data->origin_cpu_boosting_level =
+                       origin_cpu_boosting_level;
+               cpu_contention_handle_data->current_cpu_boosting_level =
+                       input->client_input.level;
+               cpu_contention_handle_data->timer_id = timer_id;
+
+               g_hash_table_insert(g_cpu_contention_handle_data_table,
+                               &cpu_contention_handle_data->tid, cpu_contention_handle_data);
+       }
+}
+
 static gboolean cpu_boosting_governor_govern_request (gpointer user_data)
 {
+       struct syscommon_resourced_cpu_boosting_input *input = NULL;
+       cpu_boosting_level_e cpu_boosting_level;
+       guint *timer_id;
        GSList *action_list = NULL;
+       GSource *source;
+       GSList *iter;
+       GSList *next;
        int ret;
 
-       for (cpu_boosting_level_e cpu_boosting_level = CPU_BOOSTING_LEVEL_STRONG;
+       /**
+        * Search cpu boosted thread list from CPU_BOOSTING_LEVEL_STRONG to
+        * CPU_BOOSTING_LEVEL_WEAK. If the governor cannot find out victims,
+        * then just drop the event and wait for the next event from the monitor.
+        */
+       for (cpu_boosting_level = CPU_BOOSTING_LEVEL_STRONG;
                        cpu_boosting_level < CPU_BOOSTING_LEVEL_END; cpu_boosting_level++) {
                ret = syscommon_plugin_resourced_cpu_boosting_governor_govern_request(
                                g_cpu_boosting_info_table[cpu_boosting_level],
                                cpu_boosting_level, &action_list);
                if (ret < 0)
                        return G_SOURCE_REMOVE;
+
+               if (action_list != NULL)
+                       break;
        }
 
+       /**
+        * Set timer to check whether cpu contention is alleviated or not
+        * after CPU_CONTENTION_HANDLE_TIMEOUT_MSEC.
+        */
+       timer_id = g_new(guint, 1);
+       source = g_timeout_source_new(CPU_CONTENTION_HANDLE_TIMEOUT_MSEC);
+       g_source_set_callback(source, cpu_contention_handle_timeout,
+                       timer_id, NULL);
+       g_latest_timer_id = *timer_id =
+               g_source_attach(source, cpu_boosting_context);
+       g_source_unref(source);
+
        if (action_list == NULL)
                return G_SOURCE_REMOVE;
 
+       gslist_for_each_safe(action_list, iter, next, input) {
+               assert(input);
+
+               register_cpu_contention_handle_data(input, cpu_boosting_level,
+                               *timer_id);
+
+               action_list = g_slist_remove(action_list, input);
+
+               /**
+                * Control cpu boosting level of victims according to
+                * the governor policy.
+                */
+               cpu_boosting_handle_command(input);
+       }
+
        return G_SOURCE_REMOVE;
 }
 
@@ -199,11 +468,13 @@ static void cpu_boosting_destroy_request(gpointer data)
        if (input->client_input.pid.tid)
                free(input->client_input.pid.tid);
 
-       if (input->client_input.dest)
+       if (input->client_input.dest) {
                free((void *)input->client_input.dest);
+       }
 
-       if (input->gsource_id)
+       if (input->gsource_id) {
                g_free(input->gsource_id);
+       }
 
        g_slice_free(struct syscommon_resourced_cpu_boosting_input, input);
 }
@@ -538,7 +809,8 @@ static int cpu_boosting_enqueue_by_conf(void *data,
        if (*input == NULL)
                return RESOURCED_ERROR_FAIL;
 
-       CPU_BOOSTING_SET_REQUEST(*input, CPU_BOOSTING_COMMAND_SET, cpu_boosting_level, -1, pid);
+       CPU_BOOSTING_SET_REQUEST(*input, CPU_BOOSTING_COMMAND_SET,
+                       cpu_boosting_level, -1, pid, 0);
 
        return RESOURCED_ERROR_NONE;
 }
@@ -572,7 +844,7 @@ static gboolean cpu_boosting_timeout(gpointer data)
                 * cpu_boosting_info->gsource_id != *(input->gsource_id)
                 * when setting cpu boosting again before timeout.
                 */
-               if (cpu_boosting_info == NULL
+               if (cpu_boosting_info == NULL || input->gsource_id == NULL
                                || cpu_boosting_info->gsource_id != *(input->gsource_id))
                        continue;
 
@@ -603,7 +875,8 @@ timer_out:
 }
 
 static void cpu_boosting_find_and_insert_info(pid_t tid,
-               cpu_boosting_level_e cpu_boosting_level, guint id)
+               cpu_boosting_flag_e cpu_boosting_flags, guint *timer_id,
+               cpu_boosting_level_e cpu_boosting_level)
 {
        struct syscommon_resourced_cpu_boosting_info *cpu_boosting_info = NULL;
 
@@ -649,7 +922,21 @@ static void cpu_boosting_find_and_insert_info(pid_t tid,
        }
 
        cpu_boosting_info->level = cpu_boosting_level;
-       cpu_boosting_info->gsource_id = id;
+
+       /**
+        * Register cpu_boosting_flags to handle cpu contention and
+        * restore cpu boosting level after solving cpu contention.
+        * For example, if resourced received CPU_BOOSTING_COMMAND_SET command
+        * with CPU_BOOSTING_RESET_ON_FORK, then turn on
+        * CPU_BOOSTING_RESET_ON_FORK flag
+        * when decreasing and restoring cpu boosting level.
+        */
+       cpu_boosting_info->cpu_boosting_flags = cpu_boosting_flags;
+
+       if (timer_id)
+               cpu_boosting_info->gsource_id = *timer_id;
+       else
+               cpu_boosting_info->gsource_id = 0;
 }
 
 static void
@@ -663,6 +950,7 @@ cpu_boosting_set(struct syscommon_resourced_cpu_boosting_input *input)
        int *tid_list = input->client_input.pid.tid;
        int timeout_msec = input->client_input.timeout_msec;
        cpu_boosting_level_e cpu_boosting_level = input->client_input.level;
+       cpu_boosting_flag_e cpu_boosting_flags = input->client_input.flags;
 
        switch (cpu_boosting_level) {
        case CPU_BOOSTING_LEVEL_STRONG:
@@ -677,35 +965,43 @@ cpu_boosting_set(struct syscommon_resourced_cpu_boosting_input *input)
        }
 
        attr = cpu_boosting_attr[cpu_boosting_level];
-       if (input->client_input.flags & CPU_BOOSTING_RESET_ON_FORK) {
+       if (cpu_boosting_flags & CPU_BOOSTING_RESET_ON_FORK) {
                _I("[CPU-BOOSTING] Turn on SCHED_RESET_ON_FORK flag");
                attr.sched_policy |= SCHED_RESET_ON_FORK;
        }
 
        for (int i = 0; i < tid_count; i++) {
-               if (tid_list[i] > 0) {
-                       if (sched_setattr(tid_list[i], &attr, 0) < 0) {
-                               _E("[CPU-BOOSTING] Failed to boost cpu of (tid = %d) with (level = %d)",
-                                               tid_list[i], cpu_boosting_level);
-                               fail_cnt++;
-                               continue;
-                       }
-                       success_cnt++;
+               if (tid_list[i] <= 0) {
+                       _W("[CPU-BOOSTING] Thread (id = %d) should be larger than 0", tid_list[i]);
+                       fail_cnt++;
+                       continue;
                }
-               else
-                       _E("[CPU-BOOSTING] Thread (id = %d) should be larger than 0", tid_list[i]);
+
+               if (sched_setattr(tid_list[i], &attr, 0) < 0) {
+                       _W("[CPU-BOOSTING] Failed to boost cpu of (tid = %d) with (level = %d)",
+                                       tid_list[i], cpu_boosting_level);
+                       tid_list[i] = 0;
+                       fail_cnt++;
+                       continue;
+               }
+               success_cnt++;
        }
 
        if (fail_cnt > 0)
-               _E("[CPU-BOOSTING] Boosting success ratio = %d/%d", success_cnt, fail_cnt + success_cnt);
+               _W("[CPU-BOOSTING] Boosting success ratio = %d/%d", success_cnt, fail_cnt + success_cnt);
 
        if (success_cnt > 0) {
-               /*
-                * If timeout value is larger than 0 and at least one boosting succeed,
-                * then set the timer to restore
+               /**
+                * If timeout_msec is negative, then do not set timer.
+                * This is because, cpu boosting should be continued until
+                * clearing cpu boosting.
+                * When the governor requests CPU_BOOSTING_COMMAND_SET command,
+                * input->gsource_id is not NULL. Therefore, new timer is always
+                * skipped during handling cpu contention.
                 */
-               if (timeout_msec > 0) {
+               if (timeout_msec > 0 && input->gsource_id == NULL) {
                        GSource *source;
+
                        input->gsource_id = g_new(guint, 1);
 
                        source = g_timeout_source_new(timeout_msec);
@@ -720,7 +1016,8 @@ cpu_boosting_set(struct syscommon_resourced_cpu_boosting_input *input)
                        if (tid_list[i] <= 0)
                                continue;
 
-                       cpu_boosting_find_and_insert_info(tid_list[i], cpu_boosting_level, id);
+                       cpu_boosting_find_and_insert_info(tid_list[i], cpu_boosting_flags,
+                                       input->gsource_id, cpu_boosting_level);
                }
        }
 
@@ -756,7 +1053,7 @@ static void cpu_boosting_clear(struct syscommon_resourced_cpu_boosting_input *in
        }
 
        if (fail_cnt > 0)
-               _E("[CPU-BOOSTING] Boosting success ratio = %d/%d", success_cnt, fail_cnt + success_cnt);
+               _W("[CPU-BOOSTING] Boosting success ratio = %d/%d", success_cnt, fail_cnt + success_cnt);
 
        if (input->remove_input)
                cpu_boosting_destroy_request(input);
@@ -806,7 +1103,7 @@ static void cpu_boosting_get(struct syscommon_resourced_cpu_boosting_input *inpu
 
 output_update:
        if (fail_cnt > 0) {
-               _E("[CPU-BOOSTING] Get boosting success ratio = %d/%d", success_cnt, fail_cnt + success_cnt);
+               _W("[CPU-BOOSTING] Get boosting success ratio = %d/%d", success_cnt, fail_cnt + success_cnt);
                output.success = false;
        }
        else
@@ -866,6 +1163,8 @@ cpu_boosting_set_inheritance(struct syscommon_resourced_cpu_boosting_input *inpu
        }
        _D("[CPU-BOOSTING] %s is boost set", input_dest->client_input.dest);
 #endif
+       if (input_dest->gsource_id)
+               g_free(g_steal_pointer(&input_dest->gsource_id));
 
        input_dest->client_input.timeout_msec = input->client_input.timeout_msec;
        input_dest->client_input.level = boosting_level;
@@ -1230,6 +1529,11 @@ static int cpu_boosting_init(void *data)
                        cpu_boosting_destroy_request);
        g_assert(dest_table);
 
+       g_cpu_contention_handle_data_table =
+               g_hash_table_new_full(g_int_hash, g_int_equal, NULL,
+                               free_cpu_contention_handle_data);
+       g_assert(g_cpu_contention_handle_data_table);
+
        /* For the conf-based client */
        register_notifier(RESOURCED_NOTIFIER_BOOSTING_RESOURCE, cpu_boosting_recv_from_conf);
 
@@ -1243,6 +1547,7 @@ static int cpu_boosting_finalize(void *data)
        for (int level = CPU_BOOSTING_LEVEL_STRONG; level < CPU_BOOSTING_LEVEL_END; level++)
                g_hash_table_destroy(g_cpu_boosting_info_table[level]);
        g_hash_table_destroy(dest_table);
+       g_hash_table_destroy(g_cpu_contention_handle_data_table);
 
        unregister_notifier(RESOURCED_NOTIFIER_BOOSTING_RESOURCE, cpu_boosting_recv_from_conf);
        return RESOURCED_ERROR_NONE;