};
// variables & definitions for thread security attributes
-static const std::string *g_p_app_label;
-static std::atomic<int> g_threads_count;
-static cap_t g_cap;
-
-// needed to keep track which thread did receive signal and which did not
-// additionally keeps information about stage to which each thread got
-#define TID_STATUS_NOT_SYNCED 0
-#define TID_STATUS_RECEIVED_SIGNAL 1
-#define TID_STATUS_SETUP_SMACK 2
-#define TID_STATUS_FULLY_SYNCED 3
-#define TID_STATUS_DEAD 4
-
-struct TidStatus {
- int tid;
- int status;
-};
-
-static TidStatus *g_tid_status;
-static int g_all_tid_num;
-
#define MAX_SIG_WAIT_TIME 10000 // times 2 ms thats 20 seconds
+#define SLEEP_CONST 2000 // 2 ms
// Hackish, based on glibc's definition in sysdeps/unix/sysv/linux/nptl-signals.h
#define SIGSETXID (__SIGRTMIN + 1)
+#define MAX_THREAD_COUNT 1000 // probably way too big number but we have to live with it somehow
+static const std::string *g_p_app_label;
-std::string readThreadLabel(int tid)
-{
- std::string path = "/proc/self/task/" + std::to_string(tid) + "/attr/current";
- char label[256];
- int fd;
- int ret;
- label[0] = 0;
+static std::atomic<int> g_th_barrier;
+
+// state is >= 0 normally
+static std::atomic<int> g_thread_state[MAX_THREAD_COUNT];
+static std::atomic<int> g_thread_index[MAX_THREAD_COUNT];
+static std::atomic<int> g_managed_tids_num;
+static cap_t g_cap;
- fd = open(path.c_str(), O_RDONLY);
- if (fd < 0) {
- LogWarning("Failed to open " << path);
- return label;
+static inline void add_managed_tid(int tid){
+ if (g_managed_tids_num == MAX_THREAD_COUNT) {
+ LogError("Adding too many threads, aborting! MAX_THREAD_COUNT == " << MAX_THREAD_COUNT);
+ abort();
}
+ g_thread_state[g_managed_tids_num] = 0;
+ g_thread_index[g_managed_tids_num] = tid;
+ g_managed_tids_num++;
+}
- ret = read(fd, label, sizeof(label) - 1);
- close(fd);
- if (ret < 0) {
- LogWarning("Failed to read " << path);
- return label;
+static inline void initialize_tid_states() {
+ g_managed_tids_num = 0;
+}
+
+static inline void set_tid_state(int tid, int state = 0) {
+ int index = -1;
+ for (int i = 0; i < g_managed_tids_num; ++i)
+ if (g_thread_index[i] == tid) {
+ index = i;
+ break;
+ }
+ if (index == -1) {
+ // no logging, this can be used inside signal handler
+ abort();
}
- label[ret] = '\0';
- return label;
+ g_thread_state[index] = state;
}
+static int count_alive_tids_with_state(int status, const std::unordered_set<int>& alive_tids) {
+ int count = 0;
+ for (int i = 0; i < g_managed_tids_num; ++i)
+ if (g_thread_state[i] == status && alive_tids.find(g_thread_index[i]) != alive_tids.end())
+ ++ count;
+ return count;
+}
SECURITY_MANAGER_API
const char *security_manager_strerror(enum lib_retcode rc)
static inline int security_manager_sync_threads_internal(const std::string &app_label)
{
static_assert(ATOMIC_INT_LOCK_FREE == 2, "std::atomic<int> is not always lock free");
- g_tid_status = NULL;
- // no need to sync the main thread
- std::unordered_set<int> tids_ignored = {Syscall::gettid()};
- auto tids_before_sync = CheckProperDrop::checkNewThreads(tids_ignored);
+ // no need to sync the main thread with signals
+ int own_tid = Syscall::gettid();
+ const pid_t own_pid = getpid();
+
+ std::unordered_set<int> tids_sent_signals = {};
g_cap = cap_init();
if (smack_check())
g_p_app_label = &app_label;
- for (unsigned attempt = 0;;++attempt) {
- LogDebug("number of threads to sync: " << tids_before_sync.size());
-
- if (tids_before_sync.empty())
- break; // nothing to do
-
- const pid_t cur_pid = getpid();
- g_threads_count = g_all_tid_num = tids_before_sync.size();
- g_tid_status = new TidStatus[g_all_tid_num];
- if (!g_tid_status) {
- LogError("Cannot allocate g_tid_status");
- return SECURITY_MANAGER_ERROR_MEMORY;
- } else {
- for (int i = 0; i < g_all_tid_num; ++i) {
- g_tid_status[i].tid = 0; // to be sure its not yet any actual TID
- g_tid_status[i].status = TID_STATUS_NOT_SYNCED;
- }
- }
- auto tid_deleter = [&](TidStatus* ptr){delete [] ptr;};
- std::unique_ptr<TidStatus, decltype(tid_deleter)> scopedTidPtr(g_tid_status, tid_deleter);
-
- struct sigaction act;
- struct sigaction old;
- memset(&act, '\0', sizeof(act));
-
- sigemptyset(&act.sa_mask);
- act.sa_flags = SA_RESTART;
- act.sa_handler = [](int signo) {
- (void)signo;
- std::atomic_thread_fence(std::memory_order_acquire);
-
- int tid = Syscall::gettid();
- int i = 0;
- for (; i < g_all_tid_num; ++i) {
- if (g_tid_status[i].tid == tid) {
- break;
- }
- }
- // The thread that received the signal is not on the list -> this shouldn't happen
- if (i == g_all_tid_num)
- abort();
-
- g_tid_status[i].status = TID_STATUS_RECEIVED_SIGNAL;
- if (g_p_app_label)
- if (label_for_self_internal(tid) != 0)
- return;
- g_tid_status[i].status = TID_STATUS_SETUP_SMACK;
- if (cap_set_proc(g_cap))
+ struct sigaction act;
+ struct sigaction old;
+ memset(&act, '\0', sizeof(act));
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_RESTART;
+ act.sa_handler = [](int signo) {
+ (void)signo;
+ std::atomic_thread_fence(std::memory_order_acquire);
+ int tid = Syscall::gettid();
+ set_tid_state(tid, 1);
+ while(g_th_barrier == 0)
+ usleep(SLEEP_CONST);
+ if (g_p_app_label)
+ if (label_for_self_internal(tid) != 0)
return;
+ if (cap_set_proc(g_cap))
+ return;
+ set_tid_state(tid, 2);
+ while(g_th_barrier == 1)
+ usleep(SLEEP_CONST);
+ };
+
+ if (Syscall::sigaction(SIGSETXID, &act, &old) < 0) {
+ LogError("Error in sigaction()");
+ return SECURITY_MANAGER_ERROR_UNKNOWN;
+ }
- g_tid_status[i].status = TID_STATUS_FULLY_SYNCED;
- g_threads_count--;
- };
-
- if (Syscall::sigaction(SIGSETXID, &act, &old) < 0) {
- LogError("Error in sigaction()");
- return SECURITY_MANAGER_ERROR_UNKNOWN;
- }
-
- std::atomic_thread_fence(std::memory_order_release);
-
- int threads_gone = 0;
- int tid_index = 0;
- for (auto it = tids_before_sync.begin(); it != tids_before_sync.end();) {
- g_tid_status[tid_index++].tid = *it;
-
- if (Syscall::tgkill(cur_pid, *it, SIGSETXID) < 0) {
- auto err = errno;
-
- if (EAGAIN == err) { // resource temporarily unavailable, trying again
- LogWarning("Received EAGAIN from tgkill, will wait 2 ms & try again");
- usleep(2000); // 2 ms
- if (Syscall::tgkill(cur_pid, *it, SIGSETXID) < 0) {
- err = errno;
- } else {
- ++it;
+ initialize_tid_states();
+ std::atomic_thread_fence(std::memory_order_release);
+
+ g_th_barrier = 0;
+ int time_left = MAX_SIG_WAIT_TIME;
+ std::unordered_set<int> current_tids = {};
+
+ do {
+ current_tids = CheckProperDrop::checkThreads(own_tid);
+ for (auto existing_tid : current_tids) {
+ if (tids_sent_signals.find(existing_tid) == tids_sent_signals.end()) {
+ tids_sent_signals.insert(existing_tid);
+ add_managed_tid(existing_tid);
+ // thread not managed yet, send signal
+ if (Syscall::tgkill(own_pid, existing_tid, SIGSETXID) < 0) {
+ auto err = errno;
+ if (EAGAIN == err) { // resource temporarily unavailable, trying again
+ LogWarning("Received EAGAIN from tgkill, wait a bit & try again");
+ usleep(SLEEP_CONST); // 2 ms
+ if (Syscall::tgkill(own_pid, existing_tid, SIGSETXID) < 0) {
+ err = errno;
+ if (ESRCH == err) { // thread already gone - noop
+ continue;
+ } else {
+ LogWithErrno(err, "tgkill()");
+ abort();
+ }
+ }
+ }
+ if (ESRCH == err) { // thread already gone - noop
continue;
+ } else {
+ LogWithErrno(err, "tgkill()");
+ abort();
}
}
- if (ESRCH == err) { // thread already gone
- threads_gone++;
- g_tid_status[tid_index-1].status = TID_STATUS_DEAD;
- it = tids_before_sync.erase(it);
- continue;
- } else
- LogWithErrno(err, "tgkill()");
}
- ++it;
}
- g_threads_count -= threads_gone;
- LogDebug("number of threads already gone (signals unsent): " << threads_gone);
-
- for (int i = MAX_SIG_WAIT_TIME; g_threads_count && i; i--) {
- if (i % 500 == 0)
- LogWarning("Not all threads synchronized yet, still waiting - threads left: " <<
- g_threads_count);
- usleep(2000); // 2 ms
+ usleep(SLEEP_CONST); // 2 ms
+ --time_left;
+
+ // break if number of threads in waiting state equals to number of alive tids minus current one
+ int ready_tids = count_alive_tids_with_state(1, current_tids);
+ if (ready_tids == (int)current_tids.size()) {
+ // threads that were read some lines above are all sleeping here - BUT - could have
+ // spawned new threads between reading list from /proc and checking status with count_alive_tids_with_state()
+ // to make sure the loop can end, here we read /proc again
+ if(CheckProperDrop::checkThreads(own_tid) == current_tids)
+ // here indeed, no new threads are present and they are all waiting in the signal handler
+ break;
}
- Syscall::sigaction(SIGSETXID, &old, nullptr);
-
- /*
- * Listing thread ids here (before cap drop) we can ommit issues like Smack access error on
- * new spawned threads in /proc.
- */
- auto tids_after_sync = CheckProperDrop::checkNewThreads(tids_ignored);
- if (g_threads_count) {
- // Here we have to check if threads that were supposedly not synchronized, are not actually dead now;
- // they can still be NOT dead in g_tid_status[i].status because at signal sending time they could still live;
- // such an issue also happened in VD's testing at least once
- int additional_deaths = 0;
- for (int i = 0; i < g_all_tid_num; ++i) {
- int tid = g_tid_status[i].tid;
-
- if (tids_after_sync.count(tid) == 0) {
- // TID is actually dead!
- ++additional_deaths;
- g_tid_status[i].status = TID_STATUS_DEAD;
- }
- }
-
- if(g_threads_count - additional_deaths > 0) {
- LogError("Not all threads synchronized: threads left: " <<
- g_threads_count - additional_deaths);
-
- for (int i = 0; i < g_all_tid_num; ++i) {
- switch (g_tid_status[i].status) {
- case TID_STATUS_DEAD:
- /* this is totally fine */
- LogError("Thread " << g_tid_status[i].tid << " died during attempt to send signal - info can be useful, but this is OK");
- break;
- case TID_STATUS_NOT_SYNCED:
- LogError("Thread " << g_tid_status[i].tid << " didn't receive the signal, serious ERROR!!!");
- break;
- case TID_STATUS_RECEIVED_SIGNAL:
- LogError("Thread " << g_tid_status[i].tid << " received signal but failed at Smack label change, serious ERROR!!!");
- break;
- case TID_STATUS_SETUP_SMACK:
- LogError("Thread " << g_tid_status[i].tid << " received signal, changed Smack label, couldn't change caps, serious ERROR!!!");
- break;
- case TID_STATUS_FULLY_SYNCED:
- /* this is totally fine */
- LogError("Thread " << g_tid_status[i].tid << " received signal and was fully synchronized - info can be useful, but this is OK");
- break;
- default:
- LogError("Thread " << g_tid_status[i].tid << " has strange status: " << g_tid_status[i].status << " this is an ERROR!!!");
- break;
- }
- }
- LogError("Aborting - please check coredump and check offending threads!");
- abort(); // needed to debug the offending thread(s)
- }
+ if (time_left % 500 == 0)
+ LogWarning("Still waiting for threads to receive signals... signals received by: " << ready_tids
+ << ", all tids alive now without current: "
+ << current_tids.size());
+ if (time_left == 0) {
+ LogError("Too much waiting for receiving signals, aborting! Signals received by: " << ready_tids
+ << ", out of alive tids without current: "
+ << current_tids.size());
+ abort();
}
+ } while (1);
- /*
- * Reaching here, we can assume all threads that were read from /proc previously are synced,
- * except current thead. What's left are threads that were spawned by other threads after
- * we listed them using the CheckProperDrop::checkNewThreads() for the first time.
- */
- for (const auto& tid : tids_before_sync)
- tids_after_sync.erase(tid);
-
- LogDebug("New threads left after synchronisation " << tids_after_sync.size());
-
- /*
- * Check if any of the remaining threads has a correct smack label. If that's the case move
- * it to ignored ones. During experiments the label was either the original (privileged)
- * one, the desired app label (g_p_app_label) or it was unreadable at all.
- *
- * This is necessary as threads with dropped capabilities may spawn new threads too. Newly
- * spawned threads have dropped capabilities as well so we don't need to bother with them
- * and definitely we don't want to abort because of them.
- */
- if (g_p_app_label) {
- for (auto it = tids_after_sync.begin(); it != tids_after_sync.end();) {
- auto label = readThreadLabel(*it);
- if (label == *g_p_app_label) {
- tids_ignored.emplace(*it);
- it = tids_after_sync.erase(it);
- } else {
- LogDebug("Privileged thread: " << *it << " label: '" << label << "' expected: '" << *g_p_app_label << "'");
- it++;
- }
- }
- }
-
- LogDebug("Remaining privileged threads " << tids_after_sync.size());
+ // here, all TIDs except current one are waiting to start changing attributes
+ // We can assume these TIDs will continue to live (no need to read /proc again), since no logic
+ // is currently going on in the process except this thread & waiting signal handlers
- if (tids_after_sync.empty())
- break; // all done
+ g_th_barrier++; // this starts the signal handlers - they will proceed once they wake up
- if (attempt < 2) {
- /*
- * Move synced threads to ignored pool and retry. 2 retries were enough to handle 300
- * recursively spawned threads in security_manager_300_prepare_app_recursive_threads.
- * If necessary we can increase it.
- */
- LogDebug("Retrying");
- tids_ignored.merge(std::move(tids_before_sync));
- tids_before_sync = std::move(tids_after_sync);
- continue;
- }
+ int ready_tids = 0;
- /*
- * If app candidate is so stubborn it continues to spawn threads, we cannot iterate this
- * forever and need to treat that filthy bastard with proper abort().
- */
- for (auto &tid : tids_after_sync)
- LogError("New thread spawned during call to prepare_app that could not get synchronized, TID: " << tid);
+ for (int i = MAX_SIG_WAIT_TIME; i > 0; --i) {
+ // break if number of completed tids equals to all of them minus current one!
+ ready_tids = count_alive_tids_with_state(2, current_tids);
+ if (ready_tids == (int)current_tids.size())
+ break;
+ usleep(SLEEP_CONST); // 2 ms
+ if (i % 500 == 0)
+ LogWarning("Still waiting for threads to finalize handlers... completed by: " << ready_tids
+ << ", all tids managed now: "
+ << current_tids.size());
+ }
- abort(); // needed to debug the offending thread(s).
+ if (ready_tids != (int)current_tids.size()) {
+ // not all TIDs reached this stage, aborting!
+ LogError("Too much waiting for sig handler completion, aborting! Signals completed by: " << ready_tids
+ << ", all tids managed now: "
+ << current_tids.size());
+ abort();
}
/*
- * The last step of successfull call - change attributes of one last thread, the main thread.
+ * Change attributes of one last thread, the main thread.
*/
- if (g_p_app_label && label_for_self_internal(Syscall::gettid()) != 0) {
- LogError("label_for_self_internal failed");
- return SECURITY_MANAGER_ERROR_UNKNOWN;
+ if (g_p_app_label && label_for_self_internal(own_tid) != 0) {
+ LogError("label_for_self_internal failed for main thread");
+ abort();
}
if (cap_set_proc(g_cap)) {
LogError("Can't drop main thread capabilities");
- return SECURITY_MANAGER_ERROR_UNKNOWN;
+ abort();
}
+ g_th_barrier++; // this starts signal handlers to proceed once they wake up - logic in app starts in env where all have changed labels
+
+ Syscall::sigaction(SIGSETXID, &old, nullptr);
+
return SECURITY_MANAGER_SUCCESS;
}