sbin_PROGRAMS = faultd
faultd_SOURCES = \
+ src/action/action_executed_handler.c \
src/action/action_executor.c \
src/core/action.c \
src/core/database.c \
TEST_UNIT_FILES = \
tests/faultd-leaker.service \
- tests/faultd-failed.service
+ tests/faultd-failed.service \
+ tests/faultd-success.service
nodist_unit_DATA += $(TEST_UNIT_FILES)
endif
%{_bindir}/leaker
%{_unitdir}/faultd-leaker.service
%{_unitdir}/faultd-failed.service
+%{_unitdir}/faultd-success.service
%{_sysconfdir}/faultd/conf.d/faultd-leaker.service.conf
%{_sysconfdir}/faultd/conf.d/faultd-failed.service.conf
uint32_t revents, void *userdata)
{
struct faultd_action *act = userdata;
+ struct ae_event_data ae_data = {
+ .reason = NULL,
+ .action = act->action_id,
+ .action_impl = act->impl_name,
+ .result = -ENODEV,
+ };
+ struct faultd_event *ev = NULL;
+ struct action_executed_event *exe_info;
+ int action_ret;
+ int ret;
+
+ ret = faultd_object_new(&ae_data.action_log);
+ if (ret < 0) {
+ log_error("Unable to init faultd object");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = faultd_event_create(ACTION_EXECUTED_EVENT_ID, &ae_data, &ev);
+ if (ret < 0) {
+ log_error("Unable to create action executed event");
+ faultd_object_destroy(ae_data.action_log);
+ goto out;
+ }
- return act->execute(act);
+ exe_info = to_action_executed_event(ev);
+
+ /*
+ * If action is not fully executed after returning from this function
+ * it takes the ownership of of exe_info and should report it when
+ * action execution is completed.
+ */
+ action_ret = act->execute(act, exe_info);
+ if (exe_info->result != -EPROBE_DEFER) {
+ if (exe_info->result < 0)
+ log_error_errno(exe_info->result, "Unable to execute action %s",
+ ae_data.action);
+
+ ret = event_processor_report_event(ev);
+ if (ret) {
+ log_error_errno(ret, "Unable to report AE event");
+ ret = 0;
+ goto cleanup;
+ }
+ }
+
+ ret = action_ret;
+
+cleanup:
+ faultd_object_unref(ae_data.action_log);
+ faultd_event_unref(ev);
+out:
+ return ret;
}
int action_executor_action_register(struct faultd_action *act)
#define FAULTD_ACTION_EXECUTOR_H
#include "event_processor.h"
+#include "action_executed_event.h"
struct faultd_action {
char *action_id;
struct sd_event_source *execution_queue_sd_source;
/* execute first action from a queue */
- int (*execute)(struct faultd_action *act);
+ int (*execute)(struct faultd_action *act,
+ struct action_executed_event *exe_info);
/* has to be initialized */
struct list_head node;
* limitations under the License.
*/
+#include <systemd/sd-bus.h>
+#include <systemd/sd-event.h>
+
#include "action.h"
#include "action_executor.h"
#include "decision_made_event.h"
#include "systemd_dbus.h"
#include "common.h"
-static int recover_service(struct faultd_action *action)
+struct priv_data {
+ struct action_executed_event *exe_info;
+ char *job;
+ char *service_path;
+ char *recovery_unit;
+ sd_bus_slot *slot;
+};
+
+static int simple_restart_service(char *service_path,
+ struct action_executed_event *exe_info)
{
- struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
- struct decision_made_event *dm_ev = to_decision_made_event(ev);
- char *service_path = NULL;
int ret;
- ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, &service_path);
- if (ret < 0) {
- log_error("Service path has not been provided");
- goto unref_old_event;
- }
-
- /*
- * TODO:
- * For now it's just copy paste of service restart
- * Add some real recovery action here
- */
ret = faultd_dbus_call_systemd_simple(service_path,
SYSTEMD_UNIT_INTERFACE,
"Restart",
"s",
"replace");
if (ret < 0)
- log_error_errno(ret, "Failed to restart service: %s", service_path);
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Failed to call systemd");
+ else
+ log_kmsg("Restarted service: %s %d", service_path, ret);
+
+ exe_info->result = ret;
+
+ return 0;
+}
+
+static int on_job_removed(sd_bus_message *m, void *userdata,
+ sd_bus_error *ret_error)
+{
+ struct priv_data *priv_data = userdata;
+ uint32_t uid;
+ char *job;
+ char *unit;
+ char *result;
+ int ret = 0;
+
+ if (strcmp(SYSTEMD_MANAGER_INTERFACE, sd_bus_message_get_interface(m)))
+ goto finish;
+
+ if (strcmp("JobRemoved", sd_bus_message_get_member(m)))
+ goto finish;
+
+ ret = sd_bus_message_read(m, "uoss", &uid, &job, &unit, &result);
+ if (ret < 0) {
+ log_error("Malformed systemd signal");
+ return -EINVAL;
+ }
+
+ if (strcmp(priv_data->job, job) == 0) {
+ ret = simple_restart_service(priv_data->service_path, priv_data->exe_info);
+
+ /* TODO: improve error handling */
+ event_processor_report_event(&priv_data->exe_info->event);
+ faultd_event_unref(&priv_data->exe_info->event);
+ free(priv_data->job);
+ sd_bus_slot_unref(priv_data->slot);
+
+ log_kmsg("Recovered service: %s", priv_data->service_path);
+ free(priv_data);
+ } else {
+ ret = 0;
+ }
+finish:
+ return ret;
+}
+
+static int recover_service(struct faultd_action *action,
+ struct action_executed_event *exe_info)
+{
+ struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
+ struct decision_made_event *dm_ev = to_decision_made_event(ev);
+ struct priv_data *priv_data;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *addr;
+ const char *job;
+ char *service_path = NULL;
+ char *recovery_unit = NULL;
+ int ret;
+
+ /*
+ * We are passing an event to exe_info, so there is no need to unref it
+ * in this function
+ */
+ exe_info->reason = ev;
+
+ ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME,
+ &service_path);
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Service path has not been provided");
+ exe_info->result = -EINVAL;
+ return 0;
+ }
+
+ ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_CLEANUP_UNIT,
+ &recovery_unit);
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "warning",
+ "Cleanup service has not been provided");
+ goto simple_reset;
+ }
+
+ priv_data = calloc(1, sizeof(*priv_data));
+ if (!priv_data) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Unable to allocate memory. Trying simple reset.");
+ goto simple_reset;
+ }
+
+ priv_data->exe_info = exe_info;
+ priv_data->recovery_unit = recovery_unit;
+ priv_data->service_path = service_path;
+
+ ret = faultd_acquire_systemd_bus(&bus);
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Unable to acquire systemd bus");
+ exe_info->result = ret;
+ goto free_priv_data;
+ }
+
+ ret = sd_bus_get_address(bus, &addr);
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Unable to get bus address");
+ exe_info->result = ret;
+ goto free_priv_data;
+ }
+
+ if (strcmp(SYSTEMD_PRIVATE_BUS, addr) == 0)
+ ret = sd_bus_add_filter(bus,
+ &priv_data->slot,
+ on_job_removed,
+ priv_data);
else
- log_kmsg("Recovering service: %s", service_path);
+ ret = sd_bus_add_match(bus,
+ &priv_data->slot,
+ "type='signal',sender='org.freedesktop.systemd1',"
+ "interface='org.freedesktop.systemd1.Manager',"
+ "member='JobRemoved',"
+ "path_namespace='/org/freedesktop/systemd1'",
+ on_job_removed,
+ priv_data);
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Unable to register for JobRemoved signal");
+ exe_info->result = ret;
+ goto free_priv_data;
+ }
+
+ ret = sd_bus_call_method(bus,
+ SYSTEMD_SERVICE,
+ SYSTEMD_OBJ,
+ SYSTEMD_MANAGER_INTERFACE,
+ "StartUnit",
+ &error,
+ &reply,
+ "ss",
+ recovery_unit,
+ "replace");
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Unable to register for JobRemoved signal");
+ exe_info->result = ret;
+ goto unregister_handler;
+ }
+
+ ret = sd_bus_message_read(reply, "o", &job);
+ if (ret < 0) {
+ faultd_object_append_string(exe_info->action_log, "warning",
+ "Malformed systemd reply. Unable to identify job.");
+ exe_info->result = ret;
+ goto unregister_handler;
+ }
+
+ priv_data->job = strdup(job);
+ if (!priv_data->job) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Unable to duplicate job id");
+ exe_info->result = ret;
+ goto unregister_handler;
+ }
+
+ exe_info->result = -EPROBE_DEFER;
+ faultd_event_ref(&exe_info->event);
+ return 0;
+
+unregister_handler:
+ sd_bus_slot_unref(priv_data->slot);
+
+free_priv_data:
+ free(priv_data);
-unref_old_event:
- faultd_event_unref(ev);
return 0;
+
+simple_reset:
+ return simple_restart_service(service_path, exe_info);
}
static struct faultd_action service_recover_action = {
#include "systemd_dbus.h"
#include "common.h"
-static int restart_service(struct faultd_action *action)
+static int restart_service(struct faultd_action *action,
+ struct action_executed_event *exe_info)
{
struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
struct decision_made_event *dm_ev = to_decision_made_event(ev);
char *service_path = NULL;
int ret;
- ret = faultd_object_get_string(dm_ev->action_data, FAULTD_AD_SERVICE_NAME, &service_path);
- if (ret < 0) {
- log_error("Service path has not been provided");
- goto unref_old_event;
+ /*
+ * We are passing an event to exe_info, so there is no need to unref it
+ * in this function
+ */
+ exe_info->reason = ev;
+
+ ret = faultd_object_get_string(dm_ev->action_data,
+ FAULTD_AD_SERVICE_NAME, &service_path);
+ if (!service_path) {
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Service path has not been provided");
+ exe_info->result = -EINVAL;
+ return 0;
}
ret = faultd_dbus_call_systemd_simple(service_path,
"s",
"replace");
if (ret < 0)
- log_error_errno(ret, "Failed to restart service: %s", service_path);
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Failed to call systemd");
else
log_kmsg("Restarting service: %s", service_path);
-unref_old_event:
- faultd_event_unref(ev);
+ exe_info->result = ret;
+
return 0;
}
#include "log.h"
#include "systemd_dbus.h"
-static int reboot_system(struct faultd_action *action)
+static int reboot_system(struct faultd_action *action,
+ struct action_executed_event *exe_info)
{
struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
struct decision_made_event *dm_ev = to_decision_made_event(ev);
struct service_failed_event *sf_ev = NULL;
struct resource_violation_event *rv_ev = NULL;
+ char *service_name = "unknown";
int ret;
if (faultd_event_is_of_type(dm_ev->reason,
SERVICE_FAILED_EVENT_ID)) {
sf_ev = to_service_failed_event(dm_ev->reason);
- log_kmsg("Trying to reboot the system (reason: %s)",
- sf_ev->service.dbus_path);
+ service_name = sf_ev->service.dbus_path;
} else if (faultd_event_is_of_type(dm_ev->reason,
RESOURCE_VIOLATION_EVENT_ID)) {
rv_ev = to_resource_violation_event(dm_ev->reason);
- log_kmsg("Trying to reboot the system (reason: %s)",
- rv_ev->service.dbus_path);
+ service_name = rv_ev->service.dbus_path;
- } else {
- log_kmsg("Trying to reboot the system");
}
+ log_kmsg("Trying to reboot the system (reason: %s)", service_name);
+
+ /*
+ * We are passing an event to exe_info, so there is no need to unref it
+ * in this function
+ */
+ exe_info->reason = ev;
+
/*
* We reboot the system without using logind because this action may be
* executed when for example dbus daemon is dead
"reboot.target",
"replace-irreversibly");
if (ret < 0)
- log_error_errno(ret, "Failed to reboot the system");
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Failed to call systemd");
+
+ faultd_object_append_string(exe_info->action_log, FAULTD_AD_SERVICE_NAME,
+ service_name);
+
+ exe_info->result = ret;
- faultd_event_unref(ev);
return 0;
}
#define REBOOT_PARAM_PATH "/run/systemd/reboot-param"
#define REBOOT_PARAM "recovery\n"
-static int reboot_system_to_recovery(struct faultd_action *action)
+static int reboot_system_to_recovery(struct faultd_action *action,
+ struct action_executed_event *exe_info)
{
struct faultd_event *ev = pop_faultd_event(&action->execution_queue);
mode_t prev_umask;
size_t n_wrte;
int ret;
+ /*
+ * We are passing an event to exe_info, so there is no need to unref it
+ * in this function
+ */
+ exe_info->reason = ev;
+
prev_umask = umask(0022);
rp_file = fopen(REBOOT_PARAM_PATH, "w");
if (!rp_file) {
- log_error("Unable to open param file");
+ faultd_object_append_string(exe_info->action_log, "fopen()",
+ "Unable to open param file");
goto reboot;
}
fclose(rp_file);
umask(prev_umask);
- if (n_wrte < 1)
- log_error("Unable to set reboot param");
-
+ if (n_wrte < 1) {
+ faultd_object_append_string(exe_info->action_log, "fwrite()",
+ "Unable to set reboot param");
+ }
reboot:
log_kmsg("Trying to reboot to recovery");
/*
"reboot.target",
"replace-irreversibly");
if (ret < 0)
- log_error_errno(ret, "Failed to reboot the system to the recovery");
+ faultd_object_append_string(exe_info->action_log, "error",
+ "Failed to call systemd");
+
+ exe_info->result = ret;
- faultd_event_unref(ev);
return 0;
}
-{"ServiceType": "org.tizen.faultd.service.VIP"}
+{
+ "RecoveryUnit": "faultd-success.service"
+}
--- /dev/null
+[Unit]
+Description="recovery handler test"
+
+[Service]
+ExecStart=/bin/sh -c 'sleep 1; exit 0'