Add decision maker to handle all standard services 66/131366/7
authorKrzysztof Opasiak <k.opasiak@samsung.com>
Fri, 26 May 2017 15:04:37 +0000 (17:04 +0200)
committerKrzysztof Opasiak <k.opasiak@samsung.com>
Mon, 5 Jun 2017 17:00:29 +0000 (19:00 +0200)
For now we always cleanup and restart service.
In future we should check the history and reboot platform
after 3 failures of some service.

Change-Id: I76562af73d65ec350f8bd34a8b96545a11e5e1ac
Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Makefile.am
src/decision_makers/standard_fault_dm.c [new file with mode: 0644]

index d7d43b63f7afcba1854a80e0fee7c8f0ab0f2279..16f23f937798a90bc90b962f3a81bad2e7a955c5 100644 (file)
@@ -48,6 +48,7 @@ faultd_SOURCES = \
     src/core/module.c \
     src/core/service.c \
     src/decision_makers/rv_dm.c \
+    src/decision_makers/standard_fault_dm.c \
     src/decision_makers/vip_fault_dm.c \
     src/event_types/decision_made_event.c \
     src/event_types/resource_violation_event.c \
diff --git a/src/decision_makers/standard_fault_dm.c b/src/decision_makers/standard_fault_dm.c
new file mode 100644 (file)
index 0000000..fbde1f3
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * This file is part of faultd.
+ *
+ * Copyright © 2017 Samsung Electronics
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <malloc.h>
+#include <errno.h>
+
+#include "service_failed_event.h"
+#include "decision_made_event.h"
+#include "action.h"
+#include "event_processor.h"
+#include "log.h"
+#include "service.h"
+
+#define MODULE_NAME "standard_fault_decision_maker"
+
+static int sf_event_match(struct faultd_event_handler *handler,
+                                                 struct faultd_event *ev)
+{
+       struct service_failed_event *sf_ev = to_service_failed_event(ev);
+
+       return faultd_event_is_of_type(ev, SERVICE_FAILED_EVENT_ID)
+               && systemd_service_is_of_type(&sf_ev->service, FAULTD_SERVICE_TYPE_NORMAL);
+}
+
+static int sf_make_decision(struct faultd_event_handler *handler)
+{
+       struct faultd_event *ev = nqueue_pop(&handler->event_queue,
+                                                                                struct faultd_event,
+                                                                                nq_node);
+       struct service_failed_event *sf_ev = to_service_failed_event(ev);
+       struct faultd_event *new_ev;
+       struct dm_event_data ev_data = {
+               .reason = ev,
+               .who_made = MODULE_NAME,
+               .action = FAULTD_ACTION_SERVICE_RECOVER_ID,
+               .action_data = NULL,
+               .action_data_release = free,
+       };
+       int ret;
+
+       /*
+        * TODO:
+        * For now we always recover the service and do nothing more.
+        * When event retrival from DB is ready this function should consider also
+        * faults history. If the same service failed for the 3rd time during
+        * the same boot we should request to reboot the platform instead of
+        * recovering that service
+        */
+
+       ev_data.action_data = strdup(sf_ev->service.dbus_path);
+       if (!ev_data.action_data) {
+               log_error("Unable to duplicate service name");
+               faultd_event_unref(ev);
+               return -ENOMEM;
+       }
+
+       ret = faultd_event_create(DECISION_MADE_EVENT_ID, &ev_data, &new_ev);
+       faultd_event_unref(ev);
+       if (ret) {
+               log_error("Unable to create event");
+               goto free_action_data;
+       }
+
+       ret = event_processor_report_event(new_ev);
+       if (ret) {
+               log_error("Unable to report event");
+               goto put_new_event;
+       }
+
+       return 0;
+
+put_new_event:
+       faultd_event_unref(new_ev);
+       return 0;
+
+free_action_data:
+       free(ev_data.action_data);
+       return 0;
+}
+
+static struct faultd_event_handler standard_fault_event_handler = {
+       .name = MODULE_NAME,
+       .event_match = sf_event_match,
+       .handle_event = sf_make_decision,
+
+       .node = LIST_HEAD_INIT(standard_fault_event_handler.node),
+};
+
+FAULTD_EVENT_HANDLER_REGISTER(standard_fault_event_handler,
+                              standard_fault_eh,
+                              FAULTD_MODULE_TYPE_DECISION_MAKER)