Add unit(in variable) & fix bugs
[platform/core/system/resourced.git] / src / common / cgroup / cgroup.c
1 /*
2  * resourced
3  *
4  * Copyright (c) 2000 - 2013 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */
19
20 /*
21  * Cgroup creation implementation
22  */
23
24 #include "cgroup.h"
25 #include "memory-cgroup.h"
26 #include "watchdog-cgroup.h"
27 #include "const.h"
28 #include "macro.h"
29 #include "util.h"
30 #include "resourced.h"
31 #include "trace.h"
32 #include "file-helper.h"
33 #include "procfs.h"
34
35 #include <dirent.h>
36 #include <errno.h>
37 #include <fcntl.h>              /*mkdirat */
38 #include <glib.h>
39 #include <limits.h>
40 #include <sys/stat.h>           /*mkdirat */
41 #include <sys/types.h>
42 #include <stdlib.h>
43 #include <stdio.h>
44 #include <string.h>
45 #include <time.h>               /*time function */
46 #include <unistd.h>
47 #include <sys/mount.h>
48
49 #define MAKE_NAME(name)    CGROUP_##name##_NAME
50 #define cgroup_name_cpy(dst, src, length)       \
51                 do { \
52                         bool success = sizeof(dst) >= length ? true : false;    \
53                         if(success) { \
54                                 strncpy(dst, src, sizeof(dst) - 1);             \
55                         }       \
56                         else {  \
57                                 return RESOURCED_ERROR_OUT_OF_MEMORY;   \
58                         }       \
59                 } while(0)
60
61 #define cgroup_name_cat(dst, src, length)       \
62                 do { \
63                         bool success = sizeof(dst) >= strlen(dst) + length ? true : false;      \
64                         if(success) { \
65                                 strncat(dst, src, sizeof(dst) - strlen(dst) - 1);               \
66                         }       \
67                         else {  \
68                                 return RESOURCED_ERROR_OUT_OF_MEMORY;   \
69                         }       \
70                 } while(0)
71
72 /*
73  * This structure has full hierarchy of cgroups on running system.
74  * It is exported through lowmem-handler.h file.
75  **/
76 static struct cgroup cgroup_tree[CGROUP_END] = {
77         {"/",                CGROUP_TOP,    CGROUP_DEFAULT_USE_HIERARCHY, NULL, NULL},
78         {CGROUP_VIP_NAME,    CGROUP_ROOT,   CGROUP_DEFAULT_USE_HIERARCHY, NULL, NULL},
79         {CGROUP_HIGH_NAME,   CGROUP_VIP,    CGROUP_DEFAULT_USE_HIERARCHY, NULL, NULL},
80         {CGROUP_MEDIUM_NAME, CGROUP_HIGH,   CGROUP_DEFAULT_USE_HIERARCHY,  NULL, NULL},
81         {CGROUP_LOW_NAME,    CGROUP_MEDIUM, CGROUP_DEFAULT_USE_HIERARCHY, NULL, NULL},
82 };
83
84 int cgroup_get_type(int oom_score_adj)
85 {
86         if (oom_score_adj == OOMADJ_SERVICE_MIN)
87                 return CGROUP_VIP;
88         else if (oom_score_adj >= OOMADJ_SU &&
89                     oom_score_adj < OOMADJ_BACKGRD_PERCEPTIBLE)
90                 return CGROUP_HIGH;
91         else if (oom_score_adj >= OOMADJ_BACKGRD_PERCEPTIBLE &&
92                 oom_score_adj < OOMADJ_BACKGRD_UNLOCKED + OOMADJ_APP_INCREASE)
93                 return CGROUP_MEDIUM;
94         else if (oom_score_adj >= OOMADJ_BACKGRD_UNLOCKED + OOMADJ_APP_INCREASE &&
95                     oom_score_adj <= OOMADJ_APP_MAX)
96                 return CGROUP_LOW;
97         else
98                 return CGROUP_ROOT;
99 }
100
101 int cgroup_get_lowest_oom_score_adj(int type)
102 {
103         if (type < CGROUP_ROOT || type > CGROUP_LOW) {
104                 _E("cgroup type should be located between CGROUP_ROOT and CGROUP_LOW");
105         }
106
107         if (type == CGROUP_VIP)
108                 return OOMADJ_SERVICE_MIN;
109         else if (type == CGROUP_HIGH)
110                 return OOMADJ_SU;
111         else if (type == CGROUP_MEDIUM)
112                 return OOMADJ_BACKGRD_PERCEPTIBLE;
113         else if (type == CGROUP_LOW)
114                 return OOMADJ_BACKGRD_UNLOCKED + OOMADJ_APP_INCREASE;
115         else
116                 return OOMADJ_SU;
117 }
118
119 int cgroup_get_highest_oom_score_adj(int type)
120 {
121         if (type == CGROUP_VIP)
122                 return OOMADJ_SERVICE_MIN;
123         else if (type == CGROUP_HIGH)
124                 return OOMADJ_FOREGRD_UNLOCKED;
125         else if (type == CGROUP_MEDIUM)
126                 return OOMADJ_BACKGRD_UNLOCKED;
127         else
128                 return OOMADJ_APP_MAX;
129 }
130
131 struct cgroup *get_cgroup_tree(int idx)
132 {
133         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
134                 _E("[CGROUP] (%d) cgroup tree is NULL", idx);
135                 return NULL;
136         }
137         else
138                 return &cgroup_tree[idx];
139 }
140
141 void set_memcg_info(int idx, struct memcg_info *mi)
142 {
143         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
144                 _E("[CGROUP] (%d) index is out of scope", idx);
145         }
146         else
147                 cgroup_tree[idx].memcg_info = mi;
148 }
149
150 struct memcg_info *get_memcg_info(int idx)
151 {
152         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
153                 _E("[CGROUP] (%d) cgroup tree's memcg info is NULL", idx);
154                 return NULL;
155         }
156         else
157                 return cgroup_tree[idx].memcg_info;
158 }
159
160 GSList *get_child_cgroups(int idx)
161 {
162         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
163                 _E("[CGROUP] (%d) cgroup tree's child is NULL", idx);
164                 return NULL;
165         }
166         else
167                 return cgroup_tree[idx].child_cgroups;
168 }
169
170 int get_parent_cgroup(int idx)
171 {
172         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
173                 _E("[CGROUP] (%d) cgroup range is out of scope", idx);
174                 return CGROUP_TOP;
175         }
176         else {
177                 return cgroup_tree[idx].parent_cgroup;
178         }
179 }
180
181 void set_use_hierarchy(int idx, bool use_hierarchy)
182 {
183         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
184                 _E("[CGROUP] (%d) cgroup range is out of scope", idx);
185         }
186         else {
187                 cgroup_tree[idx].use_hierarchy = use_hierarchy;
188         }
189 }
190
191 bool get_use_hierarchy(int idx)
192 {
193         if(idx < CGROUP_ROOT || idx >= CGROUP_END) {
194                 _E("[CGROUP] (%d) cgroup range is out of scope", idx);
195                 return CGROUP_DEFAULT_USE_HIERARCHY;
196         }
197         else {
198                 return cgroup_tree[idx].use_hierarchy;
199         }
200 }
201
202 static bool cgroup_is_exists(const char *cgroup_full_path)
203 {
204         struct stat stat_buf;
205         return stat(cgroup_full_path, &stat_buf) == 0;
206 }
207
208 static int cgroup_create(const char *cgroup_full_path)
209 {
210         if (mkdir(cgroup_full_path,
211                 S_IRUSR | S_IWUSR | S_IRGRP) < 0)
212                 return -errno;
213
214         return 0;
215 }
216
217 /*
218  * @desc place tid to tasks file
219  * @return 0 in case of success, errno value in case of failure
220  */
221 resourced_ret_c cgroup_write_tid_fullpath(const char *cgroup_full_path,
222         const int tid)
223 {
224         int ret;
225
226         if (tid <= 0) {
227                 _E("try to write empty tid to %s", cgroup_full_path);
228                 return RESOURCED_ERROR_NO_DATA;
229         }
230
231         ret = cgroup_write_node_uint32(cgroup_full_path, TASK_FILE_NAME,
232                 (u_int32_t)tid);
233
234         ret_value_msg_if(ret < 0, RESOURCED_ERROR_FAIL,
235                 "Failed to place a tid (%d) to cgroup %s : %m", tid,
236                          cgroup_full_path);
237         return RESOURCED_ERROR_NONE;
238 }
239
240 /*
241  * @desc place pid to cgroup.procs file
242  * @return 0 in case of success, errno value in case of failure
243  */
244 resourced_ret_c cgroup_write_pid_fullpath(const char *cgroup_full_path,
245         const int pid)
246 {
247         int ret;
248
249         if (pid <= 0) {
250                 _E("try to write empty pid to %s", cgroup_full_path);
251                 return RESOURCED_ERROR_NO_DATA;
252         }
253
254         ret = cgroup_write_node_uint32(cgroup_full_path, CGROUP_FILE_NAME,
255                 (u_int32_t)pid);
256
257         ret_value_msg_if(ret < 0, RESOURCED_ERROR_FAIL,
258                 "Failed to place a pid (%d) to cgroup %s : %m", pid,
259                          cgroup_full_path);
260         return RESOURCED_ERROR_NONE;
261 }
262
263 resourced_ret_c cgroup_write_pid(const char *cgroup_subsystem,
264         const char *cgroup_name, const int pid)
265 {
266         char buf[MAX_PATH_LENGTH];
267         int size = snprintf(buf, sizeof(buf), "%s/%s", cgroup_subsystem, cgroup_name);
268         if(size < 0) {
269                 _E("failed to setup path (%s/%s)", cgroup_subsystem, cgroup_name);
270                 return size;
271         }
272         return cgroup_write_pid_fullpath(buf, pid);
273 }
274
275 int cgroup_write_node_uint32(const char *cgroup_name,
276                 const char *file_name, uint32_t value)
277 {
278         char buf[MAX_PATH_LENGTH];
279         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
280         _SD("cgroup_buf %s, value %u\n", buf, value);
281         return fwrite_uint(buf, value);
282 }
283
284 int cgroup_write_node_int32(const char *cgroup_name,
285                 const char *file_name, int32_t value)
286 {
287         char buf[MAX_PATH_LENGTH];
288         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
289         _SD("cgroup_buf %s, value %d\n", buf, value);
290         return fwrite_int(buf, value);
291 }
292
293 int cgroup_write_node_ulonglong(const char *cgroup_name,
294                 const char *file_name, unsigned long long value)
295 {
296         char buf[MAX_PATH_LENGTH];
297         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
298         _SD("cgroup_buf %s, value %llu\n", buf, value);
299         return fwrite_ulonglong(buf, value);
300 }
301
302 int cgroup_write_node_str(const char *cgroup_name,
303                         const char *file_name, const char *string)
304 {
305         char buf[MAX_PATH_LENGTH];
306         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
307         _SD("cgroup_buf %s, string %s\n", buf, string);
308         return fwrite_str(buf, string);
309 }
310
311 int cgroup_read_node_uint32(const char *cgroup_name,
312                 const char *file_name, uint32_t *value)
313 {
314         char buf[MAX_PATH_LENGTH];
315         int ret;
316         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
317         ret = fread_uint(buf, value);
318         _SD("cgroup_buf %s, value %d\n", buf, *value);
319         return ret;
320 }
321
322 int cgroup_read_node_int32(const char *cgroup_name,
323                 const char *file_name, int32_t *value)
324 {
325         char buf[MAX_PATH_LENGTH];
326         int ret;
327         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
328         ret = fread_int(buf, value);
329         _SD("cgroup_buf %s, value %d\n", buf, *value);
330         return ret;
331 }
332
333 int cgroup_read_node_ulonglong(const char *cgroup_name,
334                 const char *file_name, unsigned long long *value)
335 {
336         char buf[MAX_PATH_LENGTH];
337         int ret;
338         snprintf(buf, sizeof(buf), "%s/%s", cgroup_name, file_name);
339         ret = fread_ulonglong(buf, value);
340         _SD("cgroup_buf %s, value %llu\n", buf, *value);
341         return ret;
342 }
343
344 int cgroup_make_full_subdir(const char* parentdir)
345 {
346         int result;
347         char path[MAX_PATH_LENGTH] = {0, };
348
349         cgroup_name_cpy(path, parentdir, strlen(parentdir) + 1);
350
351         for(int i = CGROUP_VIP; i < CGROUP_END; i++) {
352                 char name[MAX_NAME_LENGTH] = {0, };
353
354                 if(i == CGROUP_VIP) {
355                         cgroup_name_cpy(name, MAKE_NAME(VIP), strlen(MAKE_NAME(VIP))+ 1);
356                 }
357                 else if(i == CGROUP_HIGH) {
358                         cgroup_name_cpy(name, MAKE_NAME(HIGH), strlen(MAKE_NAME(HIGH))+ 1);
359                 }
360                 else if(i == CGROUP_MEDIUM) {
361                         cgroup_name_cpy(name, MAKE_NAME(MEDIUM), strlen(MAKE_NAME(MEDIUM))+ 1);
362                 }
363                 else if(i == CGROUP_LOW) {
364                         cgroup_name_cpy(name, MAKE_NAME(LOW), strlen(MAKE_NAME(LOW))+ 1);
365                 }
366
367                 result = cgroup_make_subdir(path, name, NULL);
368                 ret_value_msg_if(result < 0, result, "%s/%s init failed\n", path, name);
369
370                 cgroup_name_cat(path, "/", 2);
371                 cgroup_name_cat(path, name, strlen(name) + 1);
372
373                 // ../../perprocess
374                 result = cgroup_make_subdir(path, MAKE_NAME(PER_PROCESS), NULL);
375                 ret_value_msg_if(result < 0, result, "%s/%s init failed\n",
376                                 path, MAKE_NAME(PER_PROCESS));
377                 // ../../group
378                 result = cgroup_make_subdir(path, MAKE_NAME(GROUP), NULL);
379                 ret_value_msg_if(result < 0, result, "%s/%s init failed\n",
380                                 path, MAKE_NAME(GROUP));
381         }
382
383         return RESOURCED_ERROR_NONE;
384 }
385
386 int cgroup_make_subdir(const char* parentdir, const char* cgroup_name, bool *already)
387 {
388         char buf[MAX_PATH_LENGTH];
389         bool cgroup_exists;
390         bool cgroup_remount = false;
391         int ret = 0;
392
393         if (!parentdir || !cgroup_name) {
394                 _E("You should input parent and cgroup name");
395                 return RESOURCED_ERROR_INVALID_PARAMETER;
396         }
397
398         ret = snprintf(buf, sizeof(buf), "%s/%s", parentdir, cgroup_name);
399
400         ret_value_msg_if(ret > sizeof(buf), RESOURCED_ERROR_FAIL,
401                 "Not enought buffer size for %s%s", parentdir, cgroup_name);
402
403         cgroup_exists = cgroup_is_exists(buf);
404         if (!cgroup_exists) {
405                 /* FIXME: do something about this mounting, then replace
406                  * `is_exists` with checking for -EEXIST in `create`,
407                  * as currently this is both duplication AND a race condition */
408                 if (!strncmp(parentdir, CGROUP_PATH, sizeof(CGROUP_PATH))) {
409                         ret = mount("tmpfs", CGROUP_PATH, "tmpfs",
410                                         MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, "mode=755");
411                         if (ret < 0) {
412                                 _E("Fail to RW mount cgroup directory. Can't make %s cgroup", cgroup_name);
413                                 return RESOURCED_ERROR_FAIL;
414                         }
415                         cgroup_remount = true;
416                 }
417
418                 ret = cgroup_create(buf);
419                 ret_value_msg_if(ret < 0, RESOURCED_ERROR_FAIL,
420                         "Fail to create cgroup %s : err %d", cgroup_name, errno);
421
422                 if (cgroup_remount) {
423                         ret = mount("tmpfs", CGROUP_PATH, "tmpfs",
424                                         MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
425                         if (ret < 0)
426                                 _D("Fail to RO mount");
427                 }
428         }
429
430         if (already)
431                 *already = cgroup_exists;
432
433         return RESOURCED_ERROR_NONE;
434 }
435
436 int cgroup_mount_subsystem(char* source, char* mount_point, char* opts)
437 {
438         return mount(source, mount_point, "cgroup",
439                     MS_NODEV | MS_NOSUID | MS_NOEXEC, opts);
440 }
441
442 int cgroup_set_release_agent(const char *cgroup_subsys, const char *release_agent)
443 {
444         _cleanup_free_ char *buf = NULL;
445         int r;
446
447         r = asprintf(&buf, "%s/%s", CGROUP_PATH, cgroup_subsys);
448         if (r < 0)
449                 return -ENOMEM;
450
451         r = cgroup_write_node_str(buf, RELEASE_AGENT, release_agent);
452         if (r < 0)
453                 return r;
454
455         return cgroup_write_node_str(buf, NOTIFY_ON_RELEASE, "1");
456 }
457
458 int cgroup_pid_get_path(const char *controller, pid_t pid, char **path)
459 {
460         _cleanup_free_ char *pid_cgroup = NULL;
461         _cleanup_fclose_ FILE *f = NULL;
462         char buf[LINE_MAX];
463         int r;
464
465         assert(controller);
466         assert(pid >= 0);
467         assert(path);
468
469         if (pid)
470                 r = asprintf(&pid_cgroup, "/proc/%d/cgroup", pid);
471         else
472                 r = asprintf(&pid_cgroup, "/proc/self/cgroup");
473         if (r < 0)
474                 return -ENOMEM;
475
476         f = fopen(pid_cgroup, "re");
477         if (!f)
478                 return -errno;
479
480         for (;;) {
481                 _cleanup_free_ char *subsys = NULL, *p = NULL;
482                 int n, id;
483
484                 if (!fgets(buf, sizeof(buf), f)) {
485                         if (ferror(f))
486                                 return -errno;
487
488                         return -ENXIO;
489                 }
490
491                 n = sscanf(buf, "%d:%m[^:]:%ms", &id, &subsys, &p);
492                 if (n != 3)
493                         continue;
494
495                 if ((streq(subsys, controller)) ||
496                     (strneq(subsys, "name=", strlen("name=")) &&
497                      streq(subsys + strlen("name="), controller))) {
498                         *path = strdup(p);
499
500                         if (!*path)
501                                 return -ENOMEM;
502
503                         break;
504                 }
505         }
506
507         return 0;
508 }
509
510 /*
511  * Usage example:
512  *      int i;
513  *      pid_t pid;
514  *      GArray *pids_array = NULL;
515  *
516  *      cgroup_get_pids(name, &pids_array);
517  *
518  *      for (i=0; i < pids_array->len; i++)
519  *              _D("pid_t: %d", g_array_index(pids_array, pid_t, i));
520  *      g_array_free(pids_array, TRUE);
521  *
522  */
523 int cgroup_get_pids(const char *name, GArray **pids)
524 {
525         _cleanup_free_ char *cg_procs = NULL;
526         _cleanup_fclose_ FILE *f = NULL;
527         char buf[MAX_PATH_LENGTH] = "";
528         GArray *array;
529         pid_t pid;
530
531         assert(name);
532         assert(pids);
533         assert(!*pids);
534
535         if (asprintf(&cg_procs, "%s/%s", name, CGROUP_FILE_NAME) < 0)
536                 return -ENOMEM;
537
538         f = fopen(cg_procs, "r");
539         if (!f) {
540                 _E("Failed to open '%s': %m", name);
541                 return -errno;
542         }
543
544         array = g_array_new(false, false, sizeof(pid_t));
545
546         while (fgets(buf, 32, f)) {
547                 pid = atoi(buf);
548                 g_array_append_val(array, pid);
549         }
550
551         *pids = array;
552
553         return 0;
554 }