TL;DR Abort the high priority thread responsible for app startup if
it doesn't handle any events for 90s.
Recent defect
DF250609-00600 was reported because an application failed
to start within given time limit. The main security-manager thread was
handling the client request properly but there are no logs from the high
priority thread responsible for application startup. This suggests a
deadlock in this thread. We have no stack trace from security-manager
because it was application that has been aborted.
We can't do anything about the robustness tests log range so from now
on, the high priority thread will detect deadlocks by itself and abort
before the app is killed if the event processing takes too much time.
By crashing security-manager we'll get the stack trace and be able to
analyze the potential deadlock.
Note that the watchdog will not be triggered if an app startup request
is delayed because of many requests in the queue being processed slowly
but below the given timeout. In such case, it is assumed that the thread
is responsive and its logs should be visible. Still, it may lead to an
application being killed by amd.
Change-Id: Iaefe9bb831940756298751bcd1aee0d196ecce05
${SERVER_PATH}/main/socket-manager.cpp
${SERVER_PATH}/main/service-thread-dispatcher.cpp
${SERVER_PATH}/main/server-main.cpp
+ ${SERVER_PATH}/main/time-bomb.cpp
${SERVER_PATH}/service/service.cpp
)
${CMAKE_THREAD_LIBS_INIT}
${SERVER_DEP_LIBRARIES}
"-pthread"
+ "-lrt"
)
########## CLEANUP SERVICE ######################################
#pragma once
+#include <atomic>
#include <condition_variable>
-
#include <mutex>
+#include <optional>
#include <queue>
#include <thread>
-#include <protocols.h>
-
#include <event.h>
+#include <protocols.h>
+#include <time-bomb.h>
namespace SecurityManager {
~Queue() {}
void putEvent(Event &&event);
- bool empty();
- Event getEvent();
- bool wait();
+ std::optional<Event> getEvent(TimeBomb* timeBomb);
void quit();
private:
std::mutex m_mutex;
std::queue<Event> m_queue;
std::condition_variable m_waitCondition;
- bool m_quit = false;
+ std::atomic_bool m_quit = false;
};
struct QueueThread {
--- /dev/null
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All rights reserved
+ *
+ * This file is licensed under the terms of MIT License or the Apache License
+ * Version 2.0 of your choice. See the LICENSE.MIT file for MIT license details.
+ * See the LICENSE file or the notice below for Apache License Version 2.0
+ * details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <dpl/exception.h>
+
+#include <ctime>
+#include <sys/types.h>
+
+namespace SecurityManager {
+
+class TimeBomb final {
+public:
+ DECLARE_EXCEPTION_TYPE(SecurityManager::Exception, Base);
+
+ explicit TimeBomb(pid_t tid);
+ ~TimeBomb();
+
+ TimeBomb(TimeBomb &&other) = delete;
+ TimeBomb& operator=(TimeBomb &&other) = delete;
+ TimeBomb(const TimeBomb&) = delete;
+ TimeBomb& operator=(const TimeBomb&) = delete;
+
+ void rearm();
+ void defuse();
+
+private:
+ void set(time_t seconds);
+
+ timer_t m_timerId;
+ bool m_armed = false;
+};
+
+} // namespace SecurityManager
m_waitCondition.notify_one();
}
-bool Queue::empty() {
- std::lock_guard<std::mutex> lock(m_mutex);
- return m_queue.empty();
-}
-Event Queue::getEvent() {
- std::lock_guard<std::mutex> lock(m_mutex);
+std::optional<Event> Queue::getEvent(TimeBomb* timeBomb) {
+ auto lock = std::unique_lock{m_mutex};
+
+ if (m_quit.load(std::memory_order_relaxed)) {
+ if (timeBomb) {
+ timeBomb->defuse();
+ }
+ return std::nullopt;
+ }
+
if (m_queue.empty()) {
- throw std::runtime_error("queue empty");
+ if (timeBomb) {
+ timeBomb->defuse();
+ }
+ m_waitCondition.wait(lock, [&] {
+ return !m_queue.empty() || m_quit.load(std::memory_order_relaxed);
+ });
+ if (m_queue.empty() || m_quit.load(std::memory_order_relaxed)) {
+ return std::nullopt; // timeBomb is already defused
+ }
+ }
+
+ /*
+ * This will signal the current thread with SIGABRT if it does not handle this event within the
+ * given time limit.
+ */
+ if (timeBomb) {
+ timeBomb->rearm();
}
auto event = std::move(m_queue.front());
return event;
}
-bool Queue::wait() {
- std::unique_lock<std::mutex> ulock(m_mutex);
- m_waitCondition.wait(ulock, [&] { return !m_queue.empty() || m_quit; });
- return m_quit;
-}
-
void Queue::quit() {
- {
- std::lock_guard<std::mutex> lock(m_mutex);
- m_quit = true;
- }
+ m_quit.store(true, std::memory_order_relaxed);
m_waitCondition.notify_one();
}
}
void ServiceThreadDispatcher::ThreadLoop(unsigned int queueType) {
+ std::optional<TimeBomb> timeBomb;
+ if (queueType == QUEUE_TYPE_APP_LAUNCH)
+ timeBomb.emplace(gettid());
+
auto &queue = m_queueThreads[queueType].queue;
- while (!queue.wait()) {
+ while (auto event = queue.getEvent(timeBomb ? &*timeBomb : nullptr)) {
UNHANDLED_EXCEPTION_HANDLER_BEGIN
{
- processEvent(queue.getEvent());
+ processEvent(std::move(*event));
}
UNHANDLED_EXCEPTION_HANDLER_END
}
--- /dev/null
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All rights reserved
+ *
+ * This file is licensed under the terms of MIT License or the Apache License
+ * Version 2.0 of your choice. See the LICENSE.MIT file for MIT license details.
+ * See the LICENSE file or the notice below for Apache License Version 2.0
+ * details.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "time-bomb.h"
+
+#include <dpl/errno_string.h>
+#include <dpl/log/log.h>
+
+#include <csignal>
+
+namespace SecurityManager {
+
+namespace {
+
+/*
+ * Make it a bit shorter than amd app startup timeout (100s). This way security-manager will crash
+ * and provide stack trace before the app is killed.
+ */
+constexpr time_t TIMEOUT = 90;
+
+void notification(union sigval data) {
+ pid_t tid = data.sival_int;
+ LogError("Thread " << tid << " did not handle any event for " << TIMEOUT << "s. Aborting.");
+ tgkill(getpid(), tid, SIGABRT);
+}
+
+} // namespace
+
+TimeBomb::TimeBomb(pid_t tid) {
+ struct sigevent sev;
+ sev.sigev_notify = SIGEV_THREAD;
+ sev.sigev_value.sival_int = tid;
+ sev.sigev_notify_function = notification;
+ sev.sigev_notify_attributes = nullptr;
+ if (timer_create(CLOCK_MONOTONIC, &sev, &m_timerId) != 0)
+ LogAndThrowErrno(Base, "timer_create()");
+}
+
+TimeBomb::~TimeBomb() {
+ if (timer_delete(m_timerId) != 0) {
+ try {
+ LogErrno("timer_delete()");
+ } catch (...) {}
+ }
+}
+
+void TimeBomb::rearm() {
+ set(TIMEOUT);
+ m_armed = true;
+}
+
+void TimeBomb::defuse() {
+ if (m_armed) {
+ set(0);
+ m_armed = false;
+ }
+}
+
+void TimeBomb::set(time_t seconds) {
+ struct itimerspec its;
+ its.it_value.tv_sec = seconds;
+ its.it_value.tv_nsec = 0;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ if (timer_settime(m_timerId, 0, &its, nullptr) != 0)
+ LogAndThrowErrno(Base, "timer_settime()");
+}
+
+} // namespace SecurityManager
SocketManager::ConnectionID connectionID;
Credentials creds;
MessageBuffer buffer;
+
+ Event() = default;
+
+ Event(const Event&) = delete;
+ Event(Event&&) = default;
+ Event& operator=(const Event&) = delete;
+ Event& operator=(Event&&) = default;
+
SecurityModuleCall callType() {
try {
int call_type_int;