06cb48de0abbd0259480f83b3717573a8948ee5b
[platform/core/system/resourced.git] / src / process / watchdog / proc-watchdog.c
1 /*
2  * resourced
3  *
4  * Copyright (c) 2000 - 2013 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18 */
19
20 #include <ctype.h>
21 #include <stdio.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <stdarg.h>
27 #include <dirent.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31 #include <libsyscommon/list.h>
32
33 #include "const.h"
34 #include "resourced.h"
35 #include "trace.h"
36 #include "cgroup.h"
37 #include "watchdog-cgroup.h"
38 #include "procfs.h"
39 #include "macro.h"
40 #include "util.h"
41 #include "config-parser.h"
42 #include "file-helper.h"
43 #include "storage-helper.h"
44 #include "notifier.h"
45 #include "module.h"
46 #include "module-data.h"
47 #include "dbus-handler.h"
48
49 #define PROC_CONF_DIR                   "/etc/resourced/process.conf.d"
50 #define PROC_CONF_SUFFIX                ".conf"
51
52 #define SYSTEMD_DBUS_DEST              "org.freedesktop.systemd1"
53 #define SYSTEMD_DBUS_UNIT_PATH         "/org/freedesktop/systemd1/unit/"
54 #define SYSTEMD_DBUS_SERVICE_IFACE     SYSTEMD_DBUS_DEST".Service"
55 #define DBUS_IFACE_DBUS_PROPERTIES     "org.freedesktop.DBus.Properties"
56
57 #define SYSTEMD_UNIT_ESCAPE_CHAR       ".-"
58
59 #define BOOT_PARAM_PROC_WATCHDOG_REBOOT_DISABLED        "tizen.watchdog_reboot_disable"
60
61 struct proc_watchdog_group {
62         char name[64];
63         GList *service;
64         GList *process;
65
66         /* options */
67         int reboot_on_failure;
68 };
69
70 static GList *proc_watchdog_group_list;
71
72 /* print log on console */
73 static void proc_watchdog_print_console(const char *format, ...)
74 {
75         FILE *fp = NULL;
76         char buffer[256] = {0, };
77         va_list ap;
78
79         va_start(ap, format);
80         vsnprintf(buffer, sizeof(buffer), format, ap);
81         va_end(ap);
82
83         fp = fopen("/dev/console", "a");
84         if (!fp) {
85                 _E("[WATCHDOG] %s(failed to print log on console, %m)", buffer);
86                 return;
87         }
88
89         _D("[WATCHDOG] %s", buffer);
90         fprintf(fp, "%s\n", buffer);
91
92         fclose(fp);
93 }
94
95 static bool proc_watchdog_boot_param_reboot_disabled(void)
96 {
97         char cmdline[1024];
98         int retval;
99
100         retval = proc_get_cmdline(0, cmdline, sizeof(cmdline));
101         if (retval != RESOURCED_ERROR_NONE)
102                 return false;
103
104         return (strstr(cmdline, BOOT_PARAM_PROC_WATCHDOG_REBOOT_DISABLED) != NULL);
105 }
106
107 /* copy of libsyscommon/libsystemd.c: systemd_get_unit_dbus_path() */
108 static int proc_watchdog_get_escaped_name(const char *unit, char **escaped)
109 {
110         char *path = NULL;
111         int i;
112         size_t p, k, prefix_len, unit_len;
113         size_t path_len, len, escape;
114
115         if (!unit) {
116                 _E("[WATCHDOG] Invalid parameter.");
117                 return -EINVAL;
118         }
119         unit_len = strlen(unit);
120
121         for (escape = 0, p = 0; p < unit_len; escape++) {
122                 k = strcspn(unit + p, SYSTEMD_UNIT_ESCAPE_CHAR);
123                 if (p + k >= unit_len)
124                         break;
125                 p += k+1;
126         }
127
128         prefix_len = strlen(SYSTEMD_DBUS_UNIT_PATH);
129         /* assume we try to get object path of foo-bar.service then
130         * the object path will be
131         * "/org/freedesktop/systemd1/unit/foo_2dbar_2eservice\n". In
132         * this case we can find two escape characters, one of escape
133         * char('-') is changed to three of char("_2d"). So the total
134         * length will be: */
135         /* (PREFIX) + (unit - escape + 3*escape) + NULL */
136         path_len = prefix_len + (unit_len - escape)
137                 + (escape * 3 * sizeof(char)) + 1;
138         path = (char *)calloc(path_len, sizeof(char));
139         if (!path) {
140                 _E("[WATCHDOG] Not enough memory.");
141                 return -ENOMEM;
142         }
143
144         *escaped = path;
145
146         strncpy(path, SYSTEMD_DBUS_UNIT_PATH, prefix_len + 1);
147         for (i = 0, p = 0; i <= escape; i++) {
148                 k = strcspn(unit + p, SYSTEMD_UNIT_ESCAPE_CHAR);
149                 strncpy(path + prefix_len, unit + p, k);
150                 if (k < strlen(unit + p)) {
151                         len = path_len - (prefix_len + k);
152                         snprintf(path + prefix_len + k, len,
153                                         "_%x", *(unit + p + k) & 0xff);
154                         prefix_len += k + 3;
155                         p += k+1;
156                 }
157         }
158
159         return 0;
160 }
161
162 /* get MainPID of a service. */
163 static int proc_watchdog_get_service_mainpid(const char *service, int *pid)
164 {
165         char *escaped;
166         int ret = 0;
167         GVariant *reply = NULL;
168         GVariant *inner_reply = NULL;
169
170         if (!service)
171                 return -EINVAL;
172
173         ret = proc_watchdog_get_escaped_name(service, &escaped);
174         if (ret < 0) {
175                 _E("[WATCHDOG] Failed to makeup escaped service name.");
176                 return ret;
177         }
178
179         ret = d_bus_call_method_sync_gvariant_with_reply(SYSTEMD_DBUS_DEST,
180                         escaped,
181                         DBUS_IFACE_DBUS_PROPERTIES,
182                         "Get",
183                         g_variant_new("(ss)", SYSTEMD_DBUS_SERVICE_IFACE, "ExecMainPID"),
184                         &reply);
185
186         free(escaped);
187
188         if (!reply || ret < 0) {
189                 _E("[WATCHDOG] Failed to get pid of service=%s", service);
190                 if (reply)
191                         g_variant_unref(reply);
192                 return -ECOMM;
193         }
194
195         do_expr_unless_g_variant_consume_typechecked(return -EINVAL, reply, "(v)", &inner_reply);
196         do_expr_unless_g_variant_consume_typechecked(return -EINVAL, inner_reply, "u", pid);
197
198         return 0;
199 }
200
201 static bool proc_watchdog_check_all(void)
202 {
203         int pid;
204         int ret_val;
205         bool ret = true;
206         char *process;
207         char *service;
208         GList *elem1, *elem2;
209         struct proc_watchdog_group *vg;
210
211         SYS_G_LIST_FOREACH(proc_watchdog_group_list, elem1, vg) {
212                 if (!vg->reboot_on_failure)
213                         continue;
214
215                 SYS_G_LIST_FOREACH(vg->process, elem2, process) {
216                         pid = find_pid_from_cmdline(process);
217                         if (pid < 0) {
218                                 proc_watchdog_print_console("%s is not running", process);
219                                 ret = false;
220                         }
221                 }
222
223                 SYS_G_LIST_FOREACH(vg->service, elem2, service) {
224                         ret_val = proc_watchdog_get_service_mainpid(service, &pid);
225                         if (ret_val < 0) {
226                                 _E("[WATCHDOG] Failed to get MainPID of service=%s", service);
227                                 continue;
228                         }
229
230                         /* The property MainPID can return a positive MainPID
231                          * as a pid of inactive(dead) service if the service had once
232                          * been active before. Therefore, it is necessary to check
233                          * whether it is still alive. */
234                         if (pid == 0 || kill(pid, 0) != 0) {
235                                 proc_watchdog_print_console("%s is not running", service);
236                                 ret = false;
237                         }
238                 }
239         }
240
241         return ret;
242 }
243
244 static void proc_watchdog_force_reboot(void)
245 {
246         proc_watchdog_print_console("Force reboot");
247         execl("PROC_WATCHDOG_HANDLER_PATH", "PROC_WATCHDOG_HANDLER_PATH", NULL);
248
249         _E("[WATCHDOG] Failed to execute %s", PROC_WATCHDOG_HANDLER_PATH);
250 }
251
252 static int proc_watchdog_load_config(struct parse_result *result, void *user_data)
253 {
254         char vgname[64] = {0, };
255         struct proc_watchdog_group *vg;
256         GList *elem;
257
258         if (!user_data)
259                 return -1;
260
261         if (!strstr(result->section, PER_PROCESS_CONF))
262                 return -1;
263
264         snprintf(vgname, sizeof(vgname), "%s:%s", (char *)user_data, result->section);
265
266         SYS_G_LIST_FOREACH(proc_watchdog_group_list, elem, vg) {
267                 if (!strncmp(vg->name, vgname, sizeof(vg->name)))
268                         break;
269         }
270
271         if (!vg) {
272                 vg = calloc(1, sizeof(struct proc_watchdog_group));
273                 if (!vg)
274                         return -1;
275                 strncpy(vg->name, vgname, sizeof(vg->name));
276                 SYS_G_LIST_APPEND(proc_watchdog_group_list, vg);
277         }
278
279         if (!strncmp(result->name, "Process", sizeof("Process"))) {
280                 SYS_G_LIST_APPEND(vg->process, strndup(result->value, PATH_MAX));
281         } else if (!strncmp(result->name, "Service", sizeof("Service"))) {
282                 SYS_G_LIST_APPEND(vg->service, strndup(result->value, PATH_MAX));
283         } else if (!strncmp(result->name, "ActionOnFailure", sizeof("ActionOnFailure"))) {
284                 if (!strncmp(result->value, "reboot", sizeof("reboot")))
285                         vg->reboot_on_failure = 1;
286                 else
287                         vg->reboot_on_failure = 0;
288         }
289
290         return 0;
291 }
292
293 static int proc_watchdog_create_sub_cgroup(const char *name, pid_t pid)
294 {
295         _cleanup_free_ char *cgroup_name = NULL;
296         bool already;
297         int r;
298
299         assert(name);
300         assert(pid);
301
302         r = cgroup_make_subdir(PROC_WATCHDOGCG_PATH, name, &already);
303         if (r < 0) {
304                 _E("[WATCHDOG] failed to create %s sub dir", PROC_WATCHDOGCG_PATH);
305                 return r;
306         }
307
308         if (already) {
309                 _D("[WATCHDOG] PID(%d) is already registered as %s sub cgroup(%s)",
310                    pid, PROC_WATCHDOGCG_NAME, name);
311                 return 0;
312         }
313
314         r = asprintf(&cgroup_name, "%s/%s", PROC_WATCHDOGCG_PATH, name);
315         if (r < 0) {
316                 _E("[WATCHDOG] failed to allocate memory");
317                 return -ENOMEM;
318         }
319
320         cgroup_write_tid_fullpath(cgroup_name, pid);
321 /*      r = cgroup_write_node_uint32(cgroup_name, TASK_FILE_NAME, pid);
322         if (r < 0) {
323                 _E("[WATCHDOG] failed to write pid '%d' to '%s': %m",
324                    pid, cgroup_name);
325                 return r;
326         }*/
327
328         _D("[WATCHDOG] PID(%d) is registered as %s sub cgroup(%s)", pid, PROC_WATCHDOGCG_NAME, name);
329
330         return 0;
331 }
332
333 static void proc_watchdog_create_proc_name_groups(void)
334 {
335         GList *elem1, *elem2;
336         struct proc_watchdog_group *vg;
337         char *process;
338         int r;
339
340         SYS_G_LIST_FOREACH(proc_watchdog_group_list, elem1, vg) {
341                 if (!vg->reboot_on_failure)
342                         continue;
343
344                 SYS_G_LIST_FOREACH(vg->process, elem2, process) {
345                         pid_t pid = 0;
346
347                         pid = find_pid_from_cmdline(process);
348                         if (pid > 0) {
349                                 r = proc_watchdog_create_sub_cgroup(process, pid);
350                                 if (r < 0)
351                                         _E("[WATCHDOG] failed to create sub cgroup of '%s', ignoring", process);
352                         } else {
353                                 _D("[WATCHDOG] failed to find pid of name: %s", process);
354                         }
355                 }
356         }
357 }
358
359 static void proc_watchdog_create_systemd_service_groups(void)
360 {
361         GList *elem1, *elem2;
362         struct proc_watchdog_group *vg;
363         char *service;
364
365         SYS_G_LIST_FOREACH(proc_watchdog_group_list, elem1, vg) {
366                 if (!vg->reboot_on_failure)
367                         continue;
368
369                 SYS_G_LIST_FOREACH(vg->service, elem2, service) {
370                         int ret_val;
371                         pid_t pid;
372
373                         ret_val = proc_watchdog_get_service_mainpid(service, &pid);
374                         if (ret_val == -ECOMM)
375                                 continue;
376
377                         if (pid > 0) {
378                                 ret_val = proc_watchdog_create_sub_cgroup(service, pid);
379                                 if (ret_val < 0)
380                                         _E("[WATCHDOG] failed to create sub cgroup of '%s', ignoring", service);
381                         }
382                 }
383         }
384 }
385
386 static int proc_watchdog_booting_done(void *data)
387 {
388         proc_watchdog_create_proc_name_groups();
389         proc_watchdog_create_systemd_service_groups();
390
391         if (proc_watchdog_check_all() == false)
392                 proc_watchdog_force_reboot();
393
394         return 0;
395 }
396
397 static int proc_watchdog_process_disable(void *data)
398 {
399         _cleanup_close_ int checkfd = -1;
400
401         /* make tmp file */
402         checkfd = creat(CHECK_RELEASE_PROGRESS, 0640);
403         if (checkfd < 0)
404                 _E("[WATCHDOG] Unable to disable cgroup release_agent - can't make file %s (%m)", CHECK_RELEASE_PROGRESS);
405
406         return 0;
407 }
408
409 static void proc_watchdog_load_configs(void)
410 {
411         int count;
412         int idx;
413         struct dirent **namelist;
414
415         if ((count = scandir(PROC_CONF_DIR, &namelist, NULL, alphasort)) == -1) {
416                 _W("[WATCHDOG] failed to opendir (%s)", PROC_CONF_DIR);
417                 return;
418         }
419
420         for (idx = 0; idx < count; idx++) {
421                 char path[PATH_MAX] = {0, };
422
423                 if (!strstr(namelist[idx]->d_name, PROC_CONF_SUFFIX))
424                         continue;
425
426                 snprintf(path, sizeof(path), "%s/%s", PROC_CONF_DIR, namelist[idx]->d_name);
427                 config_parse(path, proc_watchdog_load_config, (void *)namelist[idx]->d_name);
428                 free(namelist[idx]);
429         }
430
431         free(namelist);
432
433         /* print result */
434         GList *elem1, *elem2;
435         struct proc_watchdog_group *vg;
436         char *name;
437
438         SYS_G_LIST_FOREACH(proc_watchdog_group_list, elem1, vg) {
439                 _D("[WATCHDOG] %s", vg->name);
440                 _D("[WATCHDOG] reboot on failure=%d", vg->reboot_on_failure);
441                 SYS_G_LIST_FOREACH(vg->process, elem2, name)
442                         _D("[WATCHDOG] proc watchdog process: %s", name);
443                 SYS_G_LIST_FOREACH(vg->service, elem2, name)
444                         _D("[WATCHDOG] proc watchdog service: %s", name);
445         }
446 }
447
448 static int resourced_proc_watchdog_process_init(void *data)
449 {
450         _cleanup_close_ int checkfd = -1;
451         int r;
452
453         if (proc_watchdog_boot_param_reboot_disabled()) {
454                 proc_watchdog_print_console("Reboot has been disabled, boot param: %s", BOOT_PARAM_PROC_WATCHDOG_REBOOT_DISABLED);
455                 return RESOURCED_ERROR_NONE;
456         }
457
458         r = access(CHECK_RELEASE_PROGRESS, F_OK);
459         if (r == 0) {
460                 r = unlink(CHECK_RELEASE_PROGRESS);
461                 if (r < 0)
462                         _E("[WATCHDOG] failed to remove %s: %m", CHECK_RELEASE_PROGRESS);
463         }
464
465         proc_watchdog_load_configs();
466
467         if (!is_mounted(PROC_WATCHDOGCG_PATH)) {
468                 r = cgroup_make_subdir(CGROUP_PATH, PROC_WATCHDOGCG_NAME, NULL);
469                 if (r < 0) {
470                         _E("[WATCHDOG] failed to make %s cgroup", PROC_WATCHDOGCG_PATH);
471                         return RESOURCED_ERROR_FAIL;
472                 }
473
474                 r = cgroup_mount_subsystem("cgroup", PROC_WATCHDOGCG_PATH,
475                                            "none,name=watchdog");
476                 if (r < 0) {
477                         _E("[WATCHDOG] failed to mount %s cgroup: %m", PROC_WATCHDOGCG_PATH);
478                         return RESOURCED_ERROR_FAIL;
479                 }
480
481                 r = cgroup_set_release_agent(PROC_WATCHDOGCG_NAME, PROC_WATCHDOG_HANDLER_PATH);
482                 if (r < 0) {
483                         _E("[WATCHDOG] failed to set cgroup release_agent: %m");
484                         return RESOURCED_ERROR_FAIL;
485                 }
486         }
487
488         proc_watchdog_create_proc_name_groups();
489         proc_watchdog_create_systemd_service_groups();
490
491         register_notifier(RESOURCED_NOTIFIER_POWER_OFF,
492                 proc_watchdog_process_disable);
493
494         r = register_notifier(RESOURCED_NOTIFIER_BOOTING_DONE,
495                               proc_watchdog_booting_done);
496         if (r < 0) {
497                 _E("[WATCHDOG] failed to register notifier BootingDone");
498                 return RESOURCED_ERROR_FAIL;
499         }
500
501         return RESOURCED_ERROR_NONE;
502 }
503
504 static int resourced_proc_watchdog_process_finalize(void *data)
505 {
506         GList *elem1, *elem1_n, *elem2, *elem2_n;
507         struct proc_watchdog_group *vg;
508         char *process, *service;
509
510         if (proc_watchdog_boot_param_reboot_disabled())
511                 return RESOURCED_ERROR_NONE;
512
513         SYS_G_LIST_FOREACH_SAFE(proc_watchdog_group_list, elem1, elem1_n, vg) {
514                 SYS_G_LIST_FOREACH_SAFE(vg->process, elem2, elem2_n, process) {
515                         SYS_G_LIST_REMOVE(vg->process, process);
516                         free(process);
517                 }
518
519                 SYS_G_LIST_FOREACH_SAFE(vg->service, elem2, elem2_n, service) {
520                         SYS_G_LIST_REMOVE(vg->service, service);
521                         free(service);
522                 }
523
524                 SYS_G_LIST_REMOVE(proc_watchdog_group_list, vg);
525                 free(vg);
526         }
527
528         proc_watchdog_process_disable(NULL);
529         unregister_notifier(RESOURCED_NOTIFIER_BOOTING_DONE, proc_watchdog_booting_done);
530         unregister_notifier(RESOURCED_NOTIFIER_POWER_OFF, proc_watchdog_process_disable);
531
532         return RESOURCED_ERROR_NONE;
533 }
534
535 static struct module_ops proc_watchdog_modules_ops = {
536         .priority = MODULE_PRIORITY_NORMAL,
537         .name = "proc-watchdog",
538         .init = resourced_proc_watchdog_process_init,
539         .exit = resourced_proc_watchdog_process_finalize,
540 };
541
542 MODULE_REGISTER(&proc_watchdog_modules_ops)