Generate action executed event when needed 79/137179/10
authorKrzysztof Opasiak <k.opasiak@samsung.com>
Tue, 4 Jul 2017 15:10:48 +0000 (17:10 +0200)
committerKonrad Kuchciak <k.kuchciak@samsung.com>
Thu, 12 Oct 2017 10:32:24 +0000 (12:32 +0200)
Every time when action is executed action executed event should
be generated. To avoid code duplication in each action we put this
into action executor itself.

If some action has to be executed asynchronously it should return
-EPROBE_DEFER from execute() function and then generate event when
execution has been finished.

This commit reworks also all action implementations to make them
compatible with a new interface. A lot of rework has been done esp.
in service_recovery action as it now starts the recovery unit and
delay event generation.

This commit also adds real implementation of recovery action.

Change-Id: I4aeec11e5ffd1bca910fa8a3351d1a6cdad241d8
Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Konrad Kuchciak <k.kuchciak@samsung.com>
Makefile.am
packaging/faultd.spec
src/action/action_executor.c
src/action/action_executor.h
src/action/service_recover.c
src/action/service_restart.c
src/action/system_reboot.c
src/action/system_reboot_to_recovery.c
tests/faultd-failed.service.conf
tests/faultd-success.service.in [new file with mode: 0644]

index 95efa0ad49e36e0aa569405a8a1320ed7f0d7390..29b6c9f6b5f420088b6460bdb5117334b6b411de 100644 (file)
@@ -73,6 +73,7 @@ serviceconfig_DATA = \
 
 sbin_PROGRAMS = faultd
 faultd_SOURCES = \
+    src/action/action_executed_handler.c \
     src/action/action_executor.c \
     src/core/action.c \
     src/core/database.c \
@@ -167,7 +168,8 @@ serviceconfig_DATA += \
 
 TEST_UNIT_FILES = \
     tests/faultd-leaker.service \
-    tests/faultd-failed.service
+    tests/faultd-failed.service \
+    tests/faultd-success.service
 
 nodist_unit_DATA += $(TEST_UNIT_FILES)
 endif
index c988c2df0af109d7cc60bd40e39cb0f71b923f4d..ff0a8e38b229247b01178745dd055cf826eca2ac 100644 (file)
@@ -93,6 +93,7 @@ done
 %{_bindir}/leaker
 %{_unitdir}/faultd-leaker.service
 %{_unitdir}/faultd-failed.service
+%{_unitdir}/faultd-success.service
 %{_sysconfdir}/faultd/conf.d/faultd-leaker.service.conf
 %{_sysconfdir}/faultd/conf.d/faultd-failed.service.conf
 
index ac4d3ea0095204f506ce2234456ee59e34bc92df..9d8112d473e85c5fb521ea209e20160b79a2c965 100644 (file)
@@ -87,8 +87,59 @@ static int action_executor_callback(sd_event_source *s, int fd,
                                                                        uint32_t revents, void *userdata)
 {
        struct faultd_action *act = userdata;
+       struct ae_event_data ae_data = {
+               .reason = NULL,
+               .action = act->action_id,
+               .action_impl = act->impl_name,
+               .result = -ENODEV,
+       };
+       struct faultd_event *ev = NULL;
+       struct action_executed_event *exe_info;
+       int action_ret;
+       int ret;
+
+       ret = faultd_object_new(&ae_data.action_log);
+       if (ret < 0) {
+               log_error("Unable to init faultd object");
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = faultd_event_create(ACTION_EXECUTED_EVENT_ID, &ae_data, &ev);
+       if (ret < 0) {
+               log_error("Unable to create action executed event");
+               faultd_object_destroy(ae_data.action_log);
+               goto out;
+       }
 
-       return act->execute(act);
+       exe_info = to_action_executed_event(ev);
+
+       /*
+        * If action is not fully executed after returning from this function
+        * it takes the ownership of of exe_info and should report it when
+        * action execution is completed.
+        */
+       action_ret = act->execute(act, exe_info);
+       if (exe_info->result != -EPROBE_DEFER) {
+               if (exe_info->result < 0)
+                       log_error_errno(exe_info->result, "Unable to execute action %s",
+                                                       ae_data.action);
+
+               ret = event_processor_report_event(ev);
+               if (ret) {
+                       log_error_errno(ret, "Unable to report AE event");
+                       ret = 0;
+                       goto cleanup;
+               }
+       }
+
+       ret = action_ret;
+
+cleanup:
+       faultd_object_unref(ae_data.action_log);
+       faultd_event_unref(ev);
+out:
+       return ret;
 }
 
 int action_executor_action_register(struct faultd_action *act)
index 6992866d7bf9f824fb8be267376a6845b30ad826..62aa778524e2e3f06e66cd14959c84e7bdfb2b95 100644 (file)
@@ -20,6 +20,7 @@
 #define FAULTD_ACTION_EXECUTOR_H
 
 #include "event_processor.h"
+#include "action_executed_event.h"
 
 struct faultd_action {
        char *action_id;
@@ -29,7 +30,8 @@ struct faultd_action {
        struct sd_event_source *execution_queue_sd_source;
 
        /* execute first action from a queue */
-       int (*execute)(struct faultd_action *act);
+       int (*execute)(struct faultd_action *act,
+                                  struct action_executed_event *exe_info);
 
        /* has to be initialized */
        struct list_head node;
index 72a5391e95e15a2f1c9d0a42f9b996e061e04fb3..d35116b6b690ca61f311793e9416f4704035c76d 100644 (file)
@@ -16,6 +16,9 @@
  * limitations under the License.
  */
 
+#include <systemd/sd-bus.h>
+#include <systemd/sd-event.h>
+
 #include "action.h"
 #include "action_executor.h"
 #include "decision_made_event.h"
 #include "systemd_dbus.h"
 #include "common.h"
 
-static int recover_service(struct faultd_action *action)
+struct priv_data {
+       struct action_executed_event *exe_info;
+       char *job;
+       char *service_path;
+       char *recovery_unit;
+       sd_bus_slot *slot;
+};
+
+static int simple_restart_service(char *service_path,
+                                                                 struct action_executed_event *exe_info)
 {
-       struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
-       struct decision_made_event *dm_ev = to_decision_made_event(ev);
-       char *service_path = NULL;
        int ret;
 
-       ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, &service_path);
-       if (ret < 0) {
-               log_error("Service path has not been provided");
-               goto unref_old_event;
-       }
-
-       /*
-        * TODO:
-        * For now it's just copy paste of service restart
-        * Add some real recovery action here
-        */
        ret = faultd_dbus_call_systemd_simple(service_path,
                                                                                  SYSTEMD_UNIT_INTERFACE,
                                                                                  "Restart",
                                                                                  "s",
                                                                                  "replace");
        if (ret < 0)
-               log_error_errno(ret, "Failed to restart service: %s", service_path);
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Failed to call systemd");
+       else
+               log_kmsg("Restarted service: %s %d", service_path, ret);
+
+       exe_info->result = ret;
+
+       return 0;
+}
+
+static int on_job_removed(sd_bus_message *m, void *userdata,
+                                                 sd_bus_error *ret_error)
+{
+       struct priv_data *priv_data = userdata;
+       uint32_t uid;
+       char *job;
+       char *unit;
+       char *result;
+       int ret = 0;
+
+       if (strcmp(SYSTEMD_MANAGER_INTERFACE, sd_bus_message_get_interface(m)))
+               goto finish;
+
+       if (strcmp("JobRemoved", sd_bus_message_get_member(m)))
+               goto finish;
+
+       ret = sd_bus_message_read(m, "uoss", &uid, &job, &unit, &result);
+       if (ret < 0) {
+               log_error("Malformed systemd signal");
+               return -EINVAL;
+       }
+
+       if (strcmp(priv_data->job, job) == 0) {
+               ret = simple_restart_service(priv_data->service_path, priv_data->exe_info);
+
+               /* TODO: improve error handling */
+               event_processor_report_event(&priv_data->exe_info->event);
+               faultd_event_unref(&priv_data->exe_info->event);
+               free(priv_data->job);
+               sd_bus_slot_unref(priv_data->slot);
+
+               log_kmsg("Recovered service: %s", priv_data->service_path);
+               free(priv_data);
+       } else {
+               ret = 0;
+       }
+finish:
+       return ret;
+}
+
+static int recover_service(struct faultd_action *action,
+                                                  struct action_executed_event *exe_info)
+{
+       struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
+       struct decision_made_event *dm_ev = to_decision_made_event(ev);
+       struct priv_data *priv_data;
+       _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+       _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+       _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+       const char *addr;
+       const char *job;
+       char *service_path = NULL;
+       char *recovery_unit = NULL;
+       int ret;
+
+       /*
+        * We are passing an event to exe_info, so there is no need to unref it
+        * in this function
+        */
+       exe_info->reason = ev;
+
+       ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME,
+                                                                  &service_path);
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Service path has not been provided");
+               exe_info->result = -EINVAL;
+               return 0;
+       }
+
+       ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_CLEANUP_UNIT,
+                                                                  &recovery_unit);
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "warning",
+                                                                       "Cleanup service has not been provided");
+               goto simple_reset;
+       }
+
+       priv_data = calloc(1, sizeof(*priv_data));
+       if (!priv_data) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                  "Unable to allocate memory. Trying simple reset.");
+               goto simple_reset;
+       }
+
+       priv_data->exe_info = exe_info;
+       priv_data->recovery_unit = recovery_unit;
+       priv_data->service_path = service_path;
+
+       ret = faultd_acquire_systemd_bus(&bus);
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Unable to acquire systemd bus");
+               exe_info->result = ret;
+               goto free_priv_data;
+       }
+
+       ret = sd_bus_get_address(bus, &addr);
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Unable to get bus address");
+               exe_info->result = ret;
+               goto free_priv_data;
+       }
+
+       if (strcmp(SYSTEMD_PRIVATE_BUS, addr) == 0)
+               ret = sd_bus_add_filter(bus,
+                                                          &priv_data->slot,
+                                                          on_job_removed,
+                                                          priv_data);
        else
-               log_kmsg("Recovering service: %s", service_path);
+               ret = sd_bus_add_match(bus,
+                                                         &priv_data->slot,
+                                                         "type='signal',sender='org.freedesktop.systemd1',"
+                                                         "interface='org.freedesktop.systemd1.Manager',"
+                                                         "member='JobRemoved',"
+                                                         "path_namespace='/org/freedesktop/systemd1'",
+                                                         on_job_removed,
+                                                         priv_data);
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Unable to register for JobRemoved signal");
+               exe_info->result = ret;
+               goto free_priv_data;
+       }
+
+       ret = sd_bus_call_method(bus,
+                                                        SYSTEMD_SERVICE,
+                                                        SYSTEMD_OBJ,
+                                                        SYSTEMD_MANAGER_INTERFACE,
+                                                        "StartUnit",
+                                                        &error,
+                                                        &reply,
+                                                        "ss",
+                                                        recovery_unit,
+                                                        "replace");
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Unable to register for JobRemoved signal");
+               exe_info->result = ret;
+               goto unregister_handler;
+       }
+
+       ret = sd_bus_message_read(reply, "o", &job);
+       if (ret < 0) {
+               faultd_object_append_string(exe_info->action_log, "warning",
+                                                                       "Malformed systemd reply. Unable to identify job.");
+               exe_info->result = ret;
+               goto unregister_handler;
+       }
+
+       priv_data->job = strdup(job);
+       if (!priv_data->job) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Unable to duplicate job id");
+               exe_info->result = ret;
+               goto unregister_handler;
+       }
+
+       exe_info->result = -EPROBE_DEFER;
+       faultd_event_ref(&exe_info->event);
+       return 0;
+
+unregister_handler:
+       sd_bus_slot_unref(priv_data->slot);
+
+free_priv_data:
+       free(priv_data);
 
-unref_old_event:
-       faultd_event_unref(ev);
        return 0;
+
+simple_reset:
+       return simple_restart_service(service_path, exe_info);
 }
 
 static struct faultd_action service_recover_action = {
index 635d5737fceaae0d133354f1b8b41680fc71b5f0..a1351337c3c234647b1e3ca4f0ee0e5a56e0c2a8 100644 (file)
 #include "systemd_dbus.h"
 #include "common.h"
 
-static int restart_service(struct faultd_action *action)
+static int restart_service(struct faultd_action *action,
+                                                  struct action_executed_event *exe_info)
 {
        struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
        struct decision_made_event *dm_ev = to_decision_made_event(ev);
        char *service_path = NULL;
        int ret;
 
-       ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, &service_path);
-       if (ret < 0) {
-               log_error("Service path has not been provided");
-               goto unref_old_event;
+       /*
+        * We are passing an event to exe_info, so there is no need to unref it
+        * in this function
+        */
+       exe_info->reason = ev;
+
+       ret = faultd_object_get_string(dm_ev->action_data,
+                                                                  FAULTD_AD_SERVICE_NAME, &service_path);
+       if (!service_path) {
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Service path has not been provided");
+               exe_info->result = -EINVAL;
+               return 0;
        }
 
        ret = faultd_dbus_call_systemd_simple(service_path,
@@ -42,12 +52,13 @@ static int restart_service(struct faultd_action *action)
                                                                                  "s",
                                                                                  "replace");
        if (ret < 0)
-               log_error_errno(ret, "Failed to restart service: %s", service_path);
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Failed to call systemd");
        else
                log_kmsg("Restarting service: %s", service_path);
 
-unref_old_event:
-       faultd_event_unref(ev);
+       exe_info->result = ret;
+
        return 0;
 }
 
index 4f65e5c58a97124560365ff1e08aef4b72a7404c..638483cdc6697b3600e4558a285f0fad0cac4095 100644 (file)
 #include "log.h"
 #include "systemd_dbus.h"
 
-static int reboot_system(struct faultd_action *action)
+static int reboot_system(struct faultd_action *action,
+                                                struct action_executed_event *exe_info)
 {
        struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
        struct decision_made_event *dm_ev = to_decision_made_event(ev);
        struct service_failed_event *sf_ev = NULL;
        struct resource_violation_event *rv_ev = NULL;
+       char *service_name = "unknown";
        int ret;
 
        if (faultd_event_is_of_type(dm_ev->reason,
                                                                SERVICE_FAILED_EVENT_ID)) {
 
                sf_ev = to_service_failed_event(dm_ev->reason);
-               log_kmsg("Trying to reboot the system (reason: %s)",
-                                sf_ev->service.dbus_path);
+               service_name = sf_ev->service.dbus_path;
 
        } else if (faultd_event_is_of_type(dm_ev->reason,
                                                                           RESOURCE_VIOLATION_EVENT_ID)) {
 
                rv_ev = to_resource_violation_event(dm_ev->reason);
-               log_kmsg("Trying to reboot the system (reason: %s)",
-                                rv_ev->service.dbus_path);
+               service_name = rv_ev->service.dbus_path;
 
-       } else {
-               log_kmsg("Trying to reboot the system");
        }
 
+       log_kmsg("Trying to reboot the system (reason: %s)", service_name);
+
+       /*
+        * We are passing an event to exe_info, so there is no need to unref it
+        * in this function
+        */
+       exe_info->reason = ev;
+
        /*
         * We reboot the system without using logind because this action may be
         * executed when for example dbus daemon is dead
@@ -61,9 +67,14 @@ static int reboot_system(struct faultd_action *action)
                                                                                  "reboot.target",
                                                                                  "replace-irreversibly");
        if (ret < 0)
-               log_error_errno(ret, "Failed to reboot the system");
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Failed to call systemd");
+
+       faultd_object_append_string(exe_info->action_log, FAULTD_AD_SERVICE_NAME,
+                                                               service_name);
+
+       exe_info->result = ret;
 
-       faultd_event_unref(ev);
        return 0;
 }
 
index be7ffde9caa711308d745dddffde98a08191f8af..9b46fcf50a8b385ac0a2a7e52f5361128fcd70ef 100644 (file)
@@ -29,7 +29,8 @@
 #define REBOOT_PARAM_PATH "/run/systemd/reboot-param"
 #define REBOOT_PARAM "recovery\n"
 
-static int reboot_system_to_recovery(struct faultd_action *action)
+static int reboot_system_to_recovery(struct faultd_action *action,
+                                                                        struct action_executed_event *exe_info)
 {
        struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
        mode_t prev_umask;
@@ -37,11 +38,18 @@ static int reboot_system_to_recovery(struct faultd_action *action)
        size_t n_wrte;
        int ret;
 
+       /*
+        * We are passing an event to exe_info, so there is no need to unref it
+        * in this function
+        */
+       exe_info->reason = ev;
+
        prev_umask = umask(0022);
 
        rp_file = fopen(REBOOT_PARAM_PATH, "w");
        if (!rp_file) {
-               log_error("Unable to open param file");
+               faultd_object_append_string(exe_info->action_log, "fopen()",
+                                                                       "Unable to open param file");
                goto reboot;
        }
 
@@ -49,9 +57,10 @@ static int reboot_system_to_recovery(struct faultd_action *action)
        fclose(rp_file);
        umask(prev_umask);
 
-       if (n_wrte < 1)
-               log_error("Unable to set reboot param");
-
+       if (n_wrte < 1) {
+               faultd_object_append_string(exe_info->action_log, "fwrite()",
+                                                                       "Unable to set reboot param");
+       }
 reboot:
        log_kmsg("Trying to reboot to recovery");
        /*
@@ -65,9 +74,11 @@ reboot:
                                                                                  "reboot.target",
                                                                                  "replace-irreversibly");
        if (ret < 0)
-               log_error_errno(ret, "Failed to reboot the system to the recovery");
+               faultd_object_append_string(exe_info->action_log, "error",
+                                                                       "Failed to call systemd");
+
+       exe_info->result = ret;
 
-       faultd_event_unref(ev);
        return 0;
 }
 
index 308cc20795bbe3935c8db87d437c27eea27d31be..2efa69605436f2eaf62db5e7b1e3f6a2baf624c7 100644 (file)
@@ -1,2 +1,4 @@
-{"ServiceType": "org.tizen.faultd.service.VIP"}
+{
+        "RecoveryUnit": "faultd-success.service"
+}
 
diff --git a/tests/faultd-success.service.in b/tests/faultd-success.service.in
new file mode 100644 (file)
index 0000000..7c76282
--- /dev/null
@@ -0,0 +1,5 @@
+[Unit]
+Description="recovery handler test"
+
+[Service]
+ExecStart=/bin/sh -c 'sleep 1; exit 0'