Add standard reboot decision maker 25/151725/7
authorKonrad Kuchciak <k.kuchciak@samsung.com>
Tue, 19 Sep 2017 11:22:33 +0000 (13:22 +0200)
committerKonrad Kuchciak <k.kuchciak@samsung.com>
Thu, 12 Oct 2017 10:34:04 +0000 (12:34 +0200)
This commit adds another scenario to faultd. If three previous reboots
were caused by the same service, reboot system to recovery.

Change-Id: Ia68ed866bbc86f071d320486cf9b278f95a9ffbc
Signed-off-by: Konrad Kuchciak <k.kuchciak@samsung.com>
Makefile.am
packaging/faultd.spec
src/decision_makers/standard_reboot_dm.c [new file with mode: 0644]

index 29b6c9f6b5f420088b6460bdb5117334b6b411de..81433d58e72682a4f178fae789f78917718411fb 100644 (file)
@@ -106,6 +106,7 @@ EXTRA_faultd_SOURCES = \
     src/decision_makers/vip_fault_dm.c \
     src/decision_makers/rv_dm.c \
     src/decision_makers/standard_fault_dm.c \
+    src/decision_makers/standard_reboot_dm.c \
     src/action/service_restart.c \
     src/action/system_reboot.c \
     src/action/system_reboot_to_recovery.c \
@@ -122,6 +123,7 @@ modules_LTLIBRARIES = audit_listener.la \
                  vip_fault_eh.la \
                  resource_violation_eh.la \
                  standard_fault_eh.la \
+                 standard_reboot_eh.la \
                  service_restart_action.la \
                  system_reboot_action.la \
                  system_reboot_to_recovery_action.la \
@@ -136,6 +138,7 @@ ejdb_dbadapter_la_LIBADD = $(LIBEJDB_LIBS)
 vip_fault_eh_la_SOURCES = src/decision_makers/vip_fault_dm.c
 resource_violation_eh_la_SOURCES = src/decision_makers/rv_dm.c
 standard_fault_eh_la_SOURCES = src/decision_makers/standard_fault_dm.c
+standard_reboot_eh_la_SOURCES = src/decision_makers/standard_reboot_dm.c
 service_restart_action_la_SOURCES = src/action/service_restart.c
 system_reboot_action_la_SOURCES = src/action/system_reboot.c
 system_reboot_to_recovery_action_la_SOURCES = src/action/system_reboot_to_recovery.c
index ff0a8e38b229247b01178745dd055cf826eca2ac..b9ce8254621ad7c92cab40fc33408c7cca22312d 100644 (file)
@@ -68,6 +68,7 @@ for mod in audit_listener \
        service_recover_action \
        service_restart_action \
        standard_fault_eh \
+       standard_reboot_eh \
        startup_listener \
        system_reboot_to_recovery_action
 do
diff --git a/src/decision_makers/standard_reboot_dm.c b/src/decision_makers/standard_reboot_dm.c
new file mode 100644 (file)
index 0000000..a55869c
--- /dev/null
@@ -0,0 +1,205 @@
+/*
+ * This file is part of faultd.
+ *
+ * Copyright © 2017 Samsung Electronics
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <malloc.h>
+#include <errno.h>
+
+#include "system_booted_event.h"
+#include "service_failed_event.h"
+#include "decision_made_event.h"
+#include "action_executed_event.h"
+#include "action.h"
+#include "event_processor.h"
+#include "log.h"
+#include "service.h"
+
+#define MODULE_NAME "standard_reboot_decision_maker"
+#define REBOOTS_BEFORE_RECOVERY 3
+
+static int sr_event_match(struct faultd_event_handler *handler,
+                                                 struct faultd_event *ev)
+{
+       return faultd_event_is_of_type(ev, SYSTEM_BOOTED_EVENT_ID);
+}
+
+static int get_reboot_reason(faultd_oid_t *boot_event, char **service_name)
+{
+       struct faultd_object *query = NULL;
+       struct faultd_object *query_hints = NULL;
+       struct faultd_object *orderby = NULL;
+       struct faultd_object *result = NULL;
+       uint32_t nr;
+       int ret;
+
+       ret = faultd_object_new_flags(&query, FAULTD_FLAG_QUERY_MODE);
+       if (ret < 0)
+               return ret;
+
+       faultd_object_append_string(query, EV_TYPE_NAME, ACTION_EXECUTED_EVENT_ID);
+       faultd_object_append_string(query, AE_EV_ACTION, FAULTD_ACTION_REBOOT_ID);
+       faultd_object_append_oid(query, EV_BOOT_EVENT, boot_event);
+
+       ret = faultd_object_new_flags(&query_hints, FAULTD_FLAG_QUERY_MODE);
+       if (ret < 0)
+               goto unref_query;
+
+       ret = faultd_object_new(&orderby);
+       if (ret < 0)
+               goto unref_query_hints;
+
+       faultd_object_append_int(orderby, EV_TIMESTAMP ".tv_sec", 1);
+       faultd_object_append_object(query_hints, "$orderby", orderby);
+
+       ret = faultd_object_new(&result);
+       if (ret < 0)
+               goto unref_query_hints;
+
+       ret = database_load(query, query_hints, result, &nr);
+       if (ret < 0 || nr == 0)
+               goto unref_result;
+
+       faultd_object_unref(query);
+       faultd_object_unref(query_hints);
+
+       ret = faultd_object_get_string(result,
+                                                                  "0." AE_EV_ACTION_LOG "." FAULTD_AD_SERVICE_NAME,
+                                                                  service_name);
+       if (ret < 0) {
+               *service_name = NULL;
+               faultd_object_unref(result);
+               return ret;
+       }
+
+       *service_name = strdup(*service_name);
+       faultd_object_unref(result);
+       if (*service_name == NULL)
+               return -ENOMEM;
+
+       return 0;
+
+unref_result:
+       faultd_object_unref(result);
+unref_query_hints:
+       faultd_object_unref(query_hints);
+unref_query:
+       faultd_object_unref(query);
+       return ret;
+}
+
+static int sr_make_decision(struct faultd_event_handler *handler)
+{
+       struct faultd_event *ev = pop_faultd_event(&handler->event_queue);
+       struct faultd_event *new_ev = NULL;
+       struct dm_event_data ev_data = {
+               .reason = ev,
+               .who_made = MODULE_NAME,
+               .action = FAULTD_ACTION_RECOVERY_REBOOT_ID,
+       };
+       struct system_booted_event *sb_ev = NULL;
+       struct faultd_event *tmp_ev = NULL;
+       char *service_name = NULL;
+       char *last_service_name = NULL;
+       int ret;
+
+       sb_ev = get_boot_event();
+       if (sb_ev == NULL) {
+               log_error("Unable to get current boot event");
+               goto unref_ev;
+       }
+
+       for (int i = 0; i < REBOOTS_BEFORE_RECOVERY; ++i) {
+               /* get prev boot event */
+               ret = database_get_event(&sb_ev->prev_boot_event, &tmp_ev);
+               faultd_event_unref(&sb_ev->event);
+               if (ret < 0) {
+                       log_error("Unable to get event from database");
+                       goto unref_ev;
+               }
+
+               sb_ev = to_system_booted_event(tmp_ev);
+
+               /* get name of service which caused the reboot */
+               ret = get_reboot_reason(&sb_ev->event.oid, &service_name);
+               if (ret < 0) {
+                       log_debug("Unable to get reboot reason service name");
+                       free(last_service_name);
+                       faultd_event_unref(&sb_ev->event);
+                       goto unref_ev;
+               }
+
+               /* compare service names */
+               if (last_service_name != NULL &&
+                       strcmp(service_name, last_service_name) != 0) {
+
+                       free(service_name);
+                       free(last_service_name);
+                       faultd_event_unref(&sb_ev->event);
+                       goto unref_ev;
+               }
+
+               free(last_service_name);
+               last_service_name = service_name;
+       }
+
+       free(last_service_name);
+       faultd_event_unref(&sb_ev->event);
+
+       /* make decision */
+       ret = faultd_object_new(&ev_data.action_data);
+       if (ret < 0) {
+               log_error("Unable to create faultd object");
+               goto unref_ev;
+       }
+
+       ret = faultd_fill_for_recovery_reboot(ev_data.action_data);
+       if (ret < 0) {
+               log_error("Unable to create action data");
+               faultd_object_unref(ev_data.action_data);
+               goto unref_ev;
+       }
+
+       ret = faultd_event_create(DECISION_MADE_EVENT_ID, &ev_data, &new_ev);
+       faultd_object_unref(ev_data.action_data);
+       if (ret < 0) {
+               log_error("Unable to create event");
+               goto unref_ev;
+       }
+
+       ret = event_processor_report_event(new_ev);
+       faultd_event_unref(new_ev);
+       if (ret) {
+               log_error("Unable to report event");
+               goto unref_ev;
+       }
+
+unref_ev:
+       faultd_event_unref(ev);
+       return 0;
+}
+
+static struct faultd_event_handler standard_reboot_event_handler = {
+       .name = MODULE_NAME,
+       .event_match = sr_event_match,
+       .handle_event = sr_make_decision,
+
+       .node = LIST_HEAD_INIT(standard_reboot_event_handler.node),
+};
+
+FAULTD_EVENT_HANDLER_REGISTER(standard_reboot_event_handler,
+                                                         standard_reboot_eh,
+                                                         FAULTD_MODULE_TYPE_DECISION_MAKER)