From: Krzysztof Opasiak Date: Tue, 4 Jul 2017 15:10:48 +0000 (+0200) Subject: Generate action executed event when needed X-Git-Tag: submit/tizen/20180222.151354~39 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc6ae1c41f8e6ece594e21dbdadfd1a0906965ff;p=platform%2Fcore%2Fsystem%2Ffaultd.git Generate action executed event when needed Every time when action is executed action executed event should be generated. To avoid code duplication in each action we put this into action executor itself. If some action has to be executed asynchronously it should return -EPROBE_DEFER from execute() function and then generate event when execution has been finished. This commit reworks also all action implementations to make them compatible with a new interface. A lot of rework has been done esp. in service_recovery action as it now starts the recovery unit and delay event generation. This commit also adds real implementation of recovery action. Change-Id: I4aeec11e5ffd1bca910fa8a3351d1a6cdad241d8 Signed-off-by: Krzysztof Opasiak Signed-off-by: Konrad Kuchciak --- diff --git a/Makefile.am b/Makefile.am index 95efa0a..29b6c9f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -73,6 +73,7 @@ serviceconfig_DATA = \ sbin_PROGRAMS = faultd faultd_SOURCES = \ + src/action/action_executed_handler.c \ src/action/action_executor.c \ src/core/action.c \ src/core/database.c \ @@ -167,7 +168,8 @@ serviceconfig_DATA += \ TEST_UNIT_FILES = \ tests/faultd-leaker.service \ - tests/faultd-failed.service + tests/faultd-failed.service \ + tests/faultd-success.service nodist_unit_DATA += $(TEST_UNIT_FILES) endif diff --git a/packaging/faultd.spec b/packaging/faultd.spec index c988c2d..ff0a8e3 100644 --- a/packaging/faultd.spec +++ b/packaging/faultd.spec @@ -93,6 +93,7 @@ done %{_bindir}/leaker %{_unitdir}/faultd-leaker.service %{_unitdir}/faultd-failed.service +%{_unitdir}/faultd-success.service %{_sysconfdir}/faultd/conf.d/faultd-leaker.service.conf %{_sysconfdir}/faultd/conf.d/faultd-failed.service.conf diff --git a/src/action/action_executor.c b/src/action/action_executor.c index ac4d3ea..9d8112d 100644 --- a/src/action/action_executor.c +++ b/src/action/action_executor.c @@ -87,8 +87,59 @@ static int action_executor_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { struct faultd_action *act = userdata; + struct ae_event_data ae_data = { + .reason = NULL, + .action = act->action_id, + .action_impl = act->impl_name, + .result = -ENODEV, + }; + struct faultd_event *ev = NULL; + struct action_executed_event *exe_info; + int action_ret; + int ret; + + ret = faultd_object_new(&ae_data.action_log); + if (ret < 0) { + log_error("Unable to init faultd object"); + ret = -ENOMEM; + goto out; + } + + ret = faultd_event_create(ACTION_EXECUTED_EVENT_ID, &ae_data, &ev); + if (ret < 0) { + log_error("Unable to create action executed event"); + faultd_object_destroy(ae_data.action_log); + goto out; + } - return act->execute(act); + exe_info = to_action_executed_event(ev); + + /* + * If action is not fully executed after returning from this function + * it takes the ownership of of exe_info and should report it when + * action execution is completed. + */ + action_ret = act->execute(act, exe_info); + if (exe_info->result != -EPROBE_DEFER) { + if (exe_info->result < 0) + log_error_errno(exe_info->result, "Unable to execute action %s", + ae_data.action); + + ret = event_processor_report_event(ev); + if (ret) { + log_error_errno(ret, "Unable to report AE event"); + ret = 0; + goto cleanup; + } + } + + ret = action_ret; + +cleanup: + faultd_object_unref(ae_data.action_log); + faultd_event_unref(ev); +out: + return ret; } int action_executor_action_register(struct faultd_action *act) diff --git a/src/action/action_executor.h b/src/action/action_executor.h index 6992866..62aa778 100644 --- a/src/action/action_executor.h +++ b/src/action/action_executor.h @@ -20,6 +20,7 @@ #define FAULTD_ACTION_EXECUTOR_H #include "event_processor.h" +#include "action_executed_event.h" struct faultd_action { char *action_id; @@ -29,7 +30,8 @@ struct faultd_action { struct sd_event_source *execution_queue_sd_source; /* execute first action from a queue */ - int (*execute)(struct faultd_action *act); + int (*execute)(struct faultd_action *act, + struct action_executed_event *exe_info); /* has to be initialized */ struct list_head node; diff --git a/src/action/service_recover.c b/src/action/service_recover.c index 72a5391..d35116b 100644 --- a/src/action/service_recover.c +++ b/src/action/service_recover.c @@ -16,6 +16,9 @@ * limitations under the License. */ +#include +#include + #include "action.h" #include "action_executor.h" #include "decision_made_event.h" @@ -23,37 +26,208 @@ #include "systemd_dbus.h" #include "common.h" -static int recover_service(struct faultd_action *action) +struct priv_data { + struct action_executed_event *exe_info; + char *job; + char *service_path; + char *recovery_unit; + sd_bus_slot *slot; +}; + +static int simple_restart_service(char *service_path, + struct action_executed_event *exe_info) { - struct faultd_event *ev = pop_faultd_event(&action->execution_queue); - struct decision_made_event *dm_ev = to_decision_made_event(ev); - char *service_path = NULL; int ret; - ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, &service_path); - if (ret < 0) { - log_error("Service path has not been provided"); - goto unref_old_event; - } - - /* - * TODO: - * For now it's just copy paste of service restart - * Add some real recovery action here - */ ret = faultd_dbus_call_systemd_simple(service_path, SYSTEMD_UNIT_INTERFACE, "Restart", "s", "replace"); if (ret < 0) - log_error_errno(ret, "Failed to restart service: %s", service_path); + faultd_object_append_string(exe_info->action_log, "error", + "Failed to call systemd"); + else + log_kmsg("Restarted service: %s %d", service_path, ret); + + exe_info->result = ret; + + return 0; +} + +static int on_job_removed(sd_bus_message *m, void *userdata, + sd_bus_error *ret_error) +{ + struct priv_data *priv_data = userdata; + uint32_t uid; + char *job; + char *unit; + char *result; + int ret = 0; + + if (strcmp(SYSTEMD_MANAGER_INTERFACE, sd_bus_message_get_interface(m))) + goto finish; + + if (strcmp("JobRemoved", sd_bus_message_get_member(m))) + goto finish; + + ret = sd_bus_message_read(m, "uoss", &uid, &job, &unit, &result); + if (ret < 0) { + log_error("Malformed systemd signal"); + return -EINVAL; + } + + if (strcmp(priv_data->job, job) == 0) { + ret = simple_restart_service(priv_data->service_path, priv_data->exe_info); + + /* TODO: improve error handling */ + event_processor_report_event(&priv_data->exe_info->event); + faultd_event_unref(&priv_data->exe_info->event); + free(priv_data->job); + sd_bus_slot_unref(priv_data->slot); + + log_kmsg("Recovered service: %s", priv_data->service_path); + free(priv_data); + } else { + ret = 0; + } +finish: + return ret; +} + +static int recover_service(struct faultd_action *action, + struct action_executed_event *exe_info) +{ + struct faultd_event *ev = pop_faultd_event(&action->execution_queue); + struct decision_made_event *dm_ev = to_decision_made_event(ev); + struct priv_data *priv_data; + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + const char *addr; + const char *job; + char *service_path = NULL; + char *recovery_unit = NULL; + int ret; + + /* + * We are passing an event to exe_info, so there is no need to unref it + * in this function + */ + exe_info->reason = ev; + + ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, + &service_path); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "error", + "Service path has not been provided"); + exe_info->result = -EINVAL; + return 0; + } + + ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_CLEANUP_UNIT, + &recovery_unit); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "warning", + "Cleanup service has not been provided"); + goto simple_reset; + } + + priv_data = calloc(1, sizeof(*priv_data)); + if (!priv_data) { + faultd_object_append_string(exe_info->action_log, "error", + "Unable to allocate memory. Trying simple reset."); + goto simple_reset; + } + + priv_data->exe_info = exe_info; + priv_data->recovery_unit = recovery_unit; + priv_data->service_path = service_path; + + ret = faultd_acquire_systemd_bus(&bus); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "error", + "Unable to acquire systemd bus"); + exe_info->result = ret; + goto free_priv_data; + } + + ret = sd_bus_get_address(bus, &addr); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "error", + "Unable to get bus address"); + exe_info->result = ret; + goto free_priv_data; + } + + if (strcmp(SYSTEMD_PRIVATE_BUS, addr) == 0) + ret = sd_bus_add_filter(bus, + &priv_data->slot, + on_job_removed, + priv_data); else - log_kmsg("Recovering service: %s", service_path); + ret = sd_bus_add_match(bus, + &priv_data->slot, + "type='signal',sender='org.freedesktop.systemd1'," + "interface='org.freedesktop.systemd1.Manager'," + "member='JobRemoved'," + "path_namespace='/org/freedesktop/systemd1'", + on_job_removed, + priv_data); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "error", + "Unable to register for JobRemoved signal"); + exe_info->result = ret; + goto free_priv_data; + } + + ret = sd_bus_call_method(bus, + SYSTEMD_SERVICE, + SYSTEMD_OBJ, + SYSTEMD_MANAGER_INTERFACE, + "StartUnit", + &error, + &reply, + "ss", + recovery_unit, + "replace"); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "error", + "Unable to register for JobRemoved signal"); + exe_info->result = ret; + goto unregister_handler; + } + + ret = sd_bus_message_read(reply, "o", &job); + if (ret < 0) { + faultd_object_append_string(exe_info->action_log, "warning", + "Malformed systemd reply. Unable to identify job."); + exe_info->result = ret; + goto unregister_handler; + } + + priv_data->job = strdup(job); + if (!priv_data->job) { + faultd_object_append_string(exe_info->action_log, "error", + "Unable to duplicate job id"); + exe_info->result = ret; + goto unregister_handler; + } + + exe_info->result = -EPROBE_DEFER; + faultd_event_ref(&exe_info->event); + return 0; + +unregister_handler: + sd_bus_slot_unref(priv_data->slot); + +free_priv_data: + free(priv_data); -unref_old_event: - faultd_event_unref(ev); return 0; + +simple_reset: + return simple_restart_service(service_path, exe_info); } static struct faultd_action service_recover_action = { diff --git a/src/action/service_restart.c b/src/action/service_restart.c index 635d573..a135133 100644 --- a/src/action/service_restart.c +++ b/src/action/service_restart.c @@ -23,17 +23,27 @@ #include "systemd_dbus.h" #include "common.h" -static int restart_service(struct faultd_action *action) +static int restart_service(struct faultd_action *action, + struct action_executed_event *exe_info) { struct faultd_event *ev = pop_faultd_event(&action->execution_queue); struct decision_made_event *dm_ev = to_decision_made_event(ev); char *service_path = NULL; int ret; - ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, &service_path); - if (ret < 0) { - log_error("Service path has not been provided"); - goto unref_old_event; + /* + * We are passing an event to exe_info, so there is no need to unref it + * in this function + */ + exe_info->reason = ev; + + ret = faultd_object_get_string(dm_ev->action_data, + FAULTD_AD_SERVICE_NAME, &service_path); + if (!service_path) { + faultd_object_append_string(exe_info->action_log, "error", + "Service path has not been provided"); + exe_info->result = -EINVAL; + return 0; } ret = faultd_dbus_call_systemd_simple(service_path, @@ -42,12 +52,13 @@ static int restart_service(struct faultd_action *action) "s", "replace"); if (ret < 0) - log_error_errno(ret, "Failed to restart service: %s", service_path); + faultd_object_append_string(exe_info->action_log, "error", + "Failed to call systemd"); else log_kmsg("Restarting service: %s", service_path); -unref_old_event: - faultd_event_unref(ev); + exe_info->result = ret; + return 0; } diff --git a/src/action/system_reboot.c b/src/action/system_reboot.c index 4f65e5c..638483c 100644 --- a/src/action/system_reboot.c +++ b/src/action/system_reboot.c @@ -24,32 +24,38 @@ #include "log.h" #include "systemd_dbus.h" -static int reboot_system(struct faultd_action *action) +static int reboot_system(struct faultd_action *action, + struct action_executed_event *exe_info) { struct faultd_event *ev = pop_faultd_event(&action->execution_queue); struct decision_made_event *dm_ev = to_decision_made_event(ev); struct service_failed_event *sf_ev = NULL; struct resource_violation_event *rv_ev = NULL; + char *service_name = "unknown"; int ret; if (faultd_event_is_of_type(dm_ev->reason, SERVICE_FAILED_EVENT_ID)) { sf_ev = to_service_failed_event(dm_ev->reason); - log_kmsg("Trying to reboot the system (reason: %s)", - sf_ev->service.dbus_path); + service_name = sf_ev->service.dbus_path; } else if (faultd_event_is_of_type(dm_ev->reason, RESOURCE_VIOLATION_EVENT_ID)) { rv_ev = to_resource_violation_event(dm_ev->reason); - log_kmsg("Trying to reboot the system (reason: %s)", - rv_ev->service.dbus_path); + service_name = rv_ev->service.dbus_path; - } else { - log_kmsg("Trying to reboot the system"); } + log_kmsg("Trying to reboot the system (reason: %s)", service_name); + + /* + * We are passing an event to exe_info, so there is no need to unref it + * in this function + */ + exe_info->reason = ev; + /* * We reboot the system without using logind because this action may be * executed when for example dbus daemon is dead @@ -61,9 +67,14 @@ static int reboot_system(struct faultd_action *action) "reboot.target", "replace-irreversibly"); if (ret < 0) - log_error_errno(ret, "Failed to reboot the system"); + faultd_object_append_string(exe_info->action_log, "error", + "Failed to call systemd"); + + faultd_object_append_string(exe_info->action_log, FAULTD_AD_SERVICE_NAME, + service_name); + + exe_info->result = ret; - faultd_event_unref(ev); return 0; } diff --git a/src/action/system_reboot_to_recovery.c b/src/action/system_reboot_to_recovery.c index be7ffde..9b46fcf 100644 --- a/src/action/system_reboot_to_recovery.c +++ b/src/action/system_reboot_to_recovery.c @@ -29,7 +29,8 @@ #define REBOOT_PARAM_PATH "/run/systemd/reboot-param" #define REBOOT_PARAM "recovery\n" -static int reboot_system_to_recovery(struct faultd_action *action) +static int reboot_system_to_recovery(struct faultd_action *action, + struct action_executed_event *exe_info) { struct faultd_event *ev = pop_faultd_event(&action->execution_queue); mode_t prev_umask; @@ -37,11 +38,18 @@ static int reboot_system_to_recovery(struct faultd_action *action) size_t n_wrte; int ret; + /* + * We are passing an event to exe_info, so there is no need to unref it + * in this function + */ + exe_info->reason = ev; + prev_umask = umask(0022); rp_file = fopen(REBOOT_PARAM_PATH, "w"); if (!rp_file) { - log_error("Unable to open param file"); + faultd_object_append_string(exe_info->action_log, "fopen()", + "Unable to open param file"); goto reboot; } @@ -49,9 +57,10 @@ static int reboot_system_to_recovery(struct faultd_action *action) fclose(rp_file); umask(prev_umask); - if (n_wrte < 1) - log_error("Unable to set reboot param"); - + if (n_wrte < 1) { + faultd_object_append_string(exe_info->action_log, "fwrite()", + "Unable to set reboot param"); + } reboot: log_kmsg("Trying to reboot to recovery"); /* @@ -65,9 +74,11 @@ reboot: "reboot.target", "replace-irreversibly"); if (ret < 0) - log_error_errno(ret, "Failed to reboot the system to the recovery"); + faultd_object_append_string(exe_info->action_log, "error", + "Failed to call systemd"); + + exe_info->result = ret; - faultd_event_unref(ev); return 0; } diff --git a/tests/faultd-failed.service.conf b/tests/faultd-failed.service.conf index 308cc20..2efa696 100644 --- a/tests/faultd-failed.service.conf +++ b/tests/faultd-failed.service.conf @@ -1,2 +1,4 @@ -{"ServiceType": "org.tizen.faultd.service.VIP"} +{ + "RecoveryUnit": "faultd-success.service" +} diff --git a/tests/faultd-success.service.in b/tests/faultd-success.service.in new file mode 100644 index 0000000..7c76282 --- /dev/null +++ b/tests/faultd-success.service.in @@ -0,0 +1,5 @@ +[Unit] +Description="recovery handler test" + +[Service] +ExecStart=/bin/sh -c 'sleep 1; exit 0'