Rework security_manager_sync_threads_internal() 66/301866/12
authorTomasz Swierczek <t.swierczek@samsung.com>
Fri, 24 Nov 2023 12:47:07 +0000 (13:47 +0100)
committerTomasz Swierczek <t.swierczek@samsung.com>
Mon, 27 Nov 2023 11:57:07 +0000 (12:57 +0100)
Add "stop-the-world" implemenation in signal handler.

Why this change may be needed?
------------------------------

On slower platforms, some dotnet-based app candidate processes
tend to have 10-15-... threads. Sending signals that change
Smack labels & drop caps to each of them can be delayed - for apps
that don't have a priority to get launched, this can take up
to 10-15 seconds, in pathological cases (observed in real life,
on low-end boards). In such situation, there's non-zero time window
when some threads have changed security attributes and some
haven't  - access control issues occur in app candidate's logic,
leading to serious errors & abort()s in the launched app.

With this modification, threads change the Smack labels & capabilities
when none of them are performing any of their original app candidate
logic and the only thread NOT in signal handler is the one
with security-manager client library.

This is expected to limit Smack/permission issues mentioned earlier.

Details, disclaimers:
---------------------

This implementation CANNOT fix an issue where app candidate process
created a resource (ie. a file) BEFORE call to security_manager_prepare_app()
and then, wants to access it (for read) AFTER call to the function.
This is because, before the transition, the process has either User
or System::Privileged Smack label, to which apps do not have R Smack rule
(such issues can still occur and can't be dealt with at security-manger level).

This implementation can be up to 3x slower on low-end boards,
as we have 3 waiting points here, instead of one:
    1) waiting for signals to arrive in all threads (place from old implementation)
    2) waiting for signals to be able to start changing caps/Smack labels
    3) waiting for signals to sync after attributes are changed to start work

Tests performed on Tizen emulator show about 30-40% slowdown
of prepare app vs. previous implementation.

This implementation assumes that the size of set of all TIDs
that can appear in a process (already dead + currently alive) will not exceed 1000.
A global array of std::atomic's is used to keep track of each thread's
state in the execution of signal handler.

This implementation uses sleep() and std::atomic to implement
waiting for the barrier inside a signal handler (home-made spinlock,
basically). While this can be done more elegantly, ie. with std::atomic's wait()
functionality, this C++ feature doesn't seem to be supported in current Tizen
toolchain (c++2a supported, not full c++20).

An alternative way for synchronization would be to use high-profile system
resources like fd's or semaphores, however, because the VD H/W platform
where the issue of thread dis-synchronization happens is very slow, interfacing
with kernel was limited to minimum, as adding any context switch to kernel
may slow down even more.

Change-Id: Ic7037acaeb4e3eaab03284ae63216e7ab4d6d862

src/client/check-proper-drop.cpp
src/client/client-security-manager.cpp
src/common/include/check-proper-drop.h

index 3159979a9b983d4ffe3eea294a0f928a7cc8dee2..0f37436247809cd2e6d48bd3d442c2a156b20366 100644 (file)
@@ -39,7 +39,7 @@ DECLARE_EXCEPTION_TYPE(SecurityManager::Exception, DropError)
 
 }  // namespace
 
-std::unordered_set<int> checkNewThreads(const std::unordered_set<int>& ignored)
+std::unordered_set<int> checkThreads(int currentTid)
 {
     FS::FileNameVector current_tids;
     for (unsigned i = 0; i < 10; ++i) {
@@ -62,10 +62,8 @@ std::unordered_set<int> checkNewThreads(const std::unordered_set<int>& ignored)
     for (auto &tid_s : current_tids) {
         int tid = atoi(tid_s.c_str());
 
-        if (ignored.count(tid) > 0) {
-            LogDebug("Thread " << tid_s << " ignored");
+        if (tid == currentTid)
             continue;
-        }
 
         LogDebug("Thread " << tid_s << " new");
         result.emplace(tid);
index 69d930ddf78676644a7a7b8f5a6a998b91d8da77..62d3faa022e04afa58c6366d761017b568b9aab8 100644 (file)
@@ -89,55 +89,57 @@ static std::map<enum lib_retcode, std::string> lib_retcode_string_map = {
 };
 
 // variables & definitions for thread security attributes
-static const std::string *g_p_app_label;
-static std::atomic<int> g_threads_count;
-static cap_t g_cap;
-
-// needed to keep track which thread did receive signal and which did not
-// additionally keeps information about stage to which each thread got
-#define TID_STATUS_NOT_SYNCED           0
-#define TID_STATUS_RECEIVED_SIGNAL      1
-#define TID_STATUS_SETUP_SMACK          2
-#define TID_STATUS_FULLY_SYNCED         3
-#define TID_STATUS_DEAD                 4
-
-struct TidStatus {
-    int tid;
-    int status;
-};
-
-static TidStatus *g_tid_status;
-static int g_all_tid_num;
-
 #define MAX_SIG_WAIT_TIME   10000 // times 2 ms thats 20 seconds
+#define SLEEP_CONST         2000  // 2 ms
 
 // Hackish, based on glibc's definition in sysdeps/unix/sysv/linux/nptl-signals.h
 #define SIGSETXID           (__SIGRTMIN + 1)
+#define MAX_THREAD_COUNT    1000 // probably way too big number but we have to live with it somehow
+static const std::string *g_p_app_label;
 
-std::string readThreadLabel(int tid)
-{
-    std::string path = "/proc/self/task/" + std::to_string(tid) + "/attr/current";
-    char label[256];
-    int fd;
-    int ret;
-    label[0] = 0;
+static std::atomic<int> g_th_barrier;
+
+// state is >= 0 normally
+static std::atomic<int> g_thread_state[MAX_THREAD_COUNT];
+static std::atomic<int> g_thread_index[MAX_THREAD_COUNT];
+static std::atomic<int> g_managed_tids_num;
+static cap_t g_cap;
 
-    fd = open(path.c_str(), O_RDONLY);
-    if (fd < 0) {
-        LogWarning("Failed to open " << path);
-        return label;
+static inline void add_managed_tid(int tid){
+    if (g_managed_tids_num == MAX_THREAD_COUNT) {
+        LogError("Adding too many threads, aborting! MAX_THREAD_COUNT == " << MAX_THREAD_COUNT);
+        abort();
     }
+    g_thread_state[g_managed_tids_num] = 0;
+    g_thread_index[g_managed_tids_num] = tid;
+    g_managed_tids_num++;
+}
 
-    ret = read(fd, label, sizeof(label) - 1);
-    close(fd);
-    if (ret <    0) {
-        LogWarning("Failed to read " << path);
-        return label;
+static inline void initialize_tid_states() {
+    g_managed_tids_num = 0;
+}
+
+static inline void set_tid_state(int tid, int state = 0) {
+    int index = -1;
+    for (int i = 0; i < g_managed_tids_num; ++i)
+        if (g_thread_index[i] == tid) {
+            index = i;
+            break;
+        }
+    if (index == -1) {
+        // no logging, this can be used inside signal handler
+        abort();
     }
-    label[ret] = '\0';
-    return label;
+    g_thread_state[index] = state;
 }
 
+static int count_alive_tids_with_state(int status, const std::unordered_set<int>& alive_tids) {
+    int count = 0;
+    for (int i = 0; i < g_managed_tids_num; ++i)
+        if (g_thread_state[i] == status && alive_tids.find(g_thread_index[i]) != alive_tids.end())
+            ++ count;
+    return count;
+}
 
 SECURITY_MANAGER_API
 const char *security_manager_strerror(enum lib_retcode rc)
@@ -634,11 +636,12 @@ inline static int label_for_self_internal(int tid)
 static inline int security_manager_sync_threads_internal(const std::string &app_label)
 {
     static_assert(ATOMIC_INT_LOCK_FREE == 2, "std::atomic<int> is not always lock free");
-    g_tid_status = NULL;
 
-    // no need to sync the main thread
-    std::unordered_set<int> tids_ignored = {Syscall::gettid()};
-    auto tids_before_sync = CheckProperDrop::checkNewThreads(tids_ignored);
+    // no need to sync the main thread with signals
+    int own_tid = Syscall::gettid();
+    const pid_t own_pid = getpid();
+
+    std::unordered_set<int> tids_sent_signals = {};
 
     g_cap = cap_init();
 
@@ -653,233 +656,144 @@ static inline int security_manager_sync_threads_internal(const std::string &app_
     if (smack_check())
         g_p_app_label = &app_label;
 
-    for (unsigned attempt = 0;;++attempt) {
-        LogDebug("number of threads to sync: " << tids_before_sync.size());
-
-        if (tids_before_sync.empty())
-            break; // nothing to do
-
-        const pid_t cur_pid = getpid();
-        g_threads_count = g_all_tid_num = tids_before_sync.size();
-        g_tid_status = new TidStatus[g_all_tid_num];
-        if (!g_tid_status) {
-            LogError("Cannot allocate g_tid_status");
-            return SECURITY_MANAGER_ERROR_MEMORY;
-        } else {
-            for (int i = 0; i < g_all_tid_num; ++i) {
-                g_tid_status[i].tid = 0;    // to be sure its not yet any actual TID
-                g_tid_status[i].status = TID_STATUS_NOT_SYNCED;
-            }
-        }
-        auto tid_deleter = [&](TidStatus* ptr){delete [] ptr;};
-        std::unique_ptr<TidStatus, decltype(tid_deleter)> scopedTidPtr(g_tid_status, tid_deleter);
-
-        struct sigaction act;
-        struct sigaction old;
-        memset(&act, '\0', sizeof(act));
-
-        sigemptyset(&act.sa_mask);
-        act.sa_flags = SA_RESTART;
-        act.sa_handler = [](int signo) {
-            (void)signo;
-            std::atomic_thread_fence(std::memory_order_acquire);
-
-            int tid = Syscall::gettid();
-            int i = 0;
-            for (; i < g_all_tid_num; ++i) {
-                if (g_tid_status[i].tid == tid) {
-                    break;
-                }
-            }
-            // The thread that received the signal is not on the list -> this shouldn't happen
-            if (i == g_all_tid_num)
-                abort();
-
-            g_tid_status[i].status = TID_STATUS_RECEIVED_SIGNAL;
-            if (g_p_app_label)
-                if (label_for_self_internal(tid) != 0)
-                    return;
-            g_tid_status[i].status = TID_STATUS_SETUP_SMACK;
-            if (cap_set_proc(g_cap))
+    struct sigaction act;
+    struct sigaction old;
+    memset(&act, '\0', sizeof(act));
+
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_RESTART;
+    act.sa_handler = [](int signo) {
+        (void)signo;
+        std::atomic_thread_fence(std::memory_order_acquire);
+        int tid = Syscall::gettid();
+        set_tid_state(tid, 1);
+        while(g_th_barrier == 0)
+            usleep(SLEEP_CONST);
+        if (g_p_app_label)
+            if (label_for_self_internal(tid) != 0)
                 return;
+        if (cap_set_proc(g_cap))
+            return;
+        set_tid_state(tid, 2);
+        while(g_th_barrier == 1)
+            usleep(SLEEP_CONST);
+    };
+
+    if (Syscall::sigaction(SIGSETXID, &act, &old) < 0) {
+        LogError("Error in sigaction()");
+        return SECURITY_MANAGER_ERROR_UNKNOWN;
+    }
 
-            g_tid_status[i].status = TID_STATUS_FULLY_SYNCED;
-            g_threads_count--;
-        };
-
-        if (Syscall::sigaction(SIGSETXID, &act, &old) < 0) {
-            LogError("Error in sigaction()");
-            return SECURITY_MANAGER_ERROR_UNKNOWN;
-        }
-
-        std::atomic_thread_fence(std::memory_order_release);
-
-        int threads_gone = 0;
-        int tid_index = 0;
-        for (auto it = tids_before_sync.begin(); it != tids_before_sync.end();) {
-            g_tid_status[tid_index++].tid = *it;
-
-            if (Syscall::tgkill(cur_pid, *it, SIGSETXID) < 0) {
-                auto err = errno;
-
-                if (EAGAIN == err) { // resource temporarily unavailable, trying again
-                    LogWarning("Received EAGAIN from tgkill, will wait 2 ms & try again");
-                    usleep(2000);   // 2 ms
-                    if (Syscall::tgkill(cur_pid, *it, SIGSETXID) < 0) {
-                        err = errno;
-                    } else {
-                        ++it;
+    initialize_tid_states();
+    std::atomic_thread_fence(std::memory_order_release);
+
+    g_th_barrier = 0;
+    int time_left = MAX_SIG_WAIT_TIME;
+    std::unordered_set<int> current_tids = {};
+
+    do {
+        current_tids = CheckProperDrop::checkThreads(own_tid);
+        for (auto existing_tid : current_tids) {
+            if (tids_sent_signals.find(existing_tid) == tids_sent_signals.end()) {
+                tids_sent_signals.insert(existing_tid);
+                add_managed_tid(existing_tid);
+                // thread not managed yet, send signal
+                if (Syscall::tgkill(own_pid, existing_tid, SIGSETXID) < 0) {
+                    auto err = errno;
+                    if (EAGAIN == err) { // resource temporarily unavailable, trying again
+                        LogWarning("Received EAGAIN from tgkill, wait a bit & try again");
+                        usleep(SLEEP_CONST);   // 2 ms
+                        if (Syscall::tgkill(own_pid, existing_tid, SIGSETXID) < 0) {
+                            err = errno;
+                            if (ESRCH == err) { // thread already gone - noop
+                                continue;
+                            } else {
+                                LogWithErrno(err, "tgkill()");
+                                abort();
+                            }
+                        }
+                    }
+                    if (ESRCH == err) { // thread already gone - noop
                         continue;
+                    } else {
+                        LogWithErrno(err, "tgkill()");
+                        abort();
                     }
                 }
-                if (ESRCH == err) { // thread already gone
-                    threads_gone++;
-                    g_tid_status[tid_index-1].status = TID_STATUS_DEAD;
-                    it = tids_before_sync.erase(it);
-                    continue;
-                } else
-                    LogWithErrno(err, "tgkill()");
             }
-            ++it;
         }
 
-        g_threads_count -= threads_gone;
-        LogDebug("number of threads already gone (signals unsent): " << threads_gone);
-
-        for (int i = MAX_SIG_WAIT_TIME; g_threads_count && i; i--) {
-            if (i % 500 == 0)
-                LogWarning("Not all threads synchronized yet, still waiting - threads left: " <<
-                           g_threads_count);
-            usleep(2000);   // 2 ms
+        usleep(SLEEP_CONST);   // 2 ms
+        --time_left;
+
+        // break if number of threads in waiting state equals to number of alive tids minus current one
+        int ready_tids = count_alive_tids_with_state(1, current_tids);
+        if (ready_tids == (int)current_tids.size()) {
+            // threads that were read some lines above are all sleeping here - BUT - could have
+            // spawned new threads between reading list from /proc and checking status with count_alive_tids_with_state()
+            // to make sure the loop can end, here we read /proc again
+            if(CheckProperDrop::checkThreads(own_tid) == current_tids)
+                // here indeed, no new threads are present and they are all waiting in the signal handler
+                break;
         }
 
-        Syscall::sigaction(SIGSETXID, &old, nullptr);
-
-        /*
-         * Listing thread ids here (before cap drop) we can ommit issues like Smack access error on
-         * new spawned threads in /proc.
-         */
-        auto tids_after_sync = CheckProperDrop::checkNewThreads(tids_ignored);
-        if (g_threads_count) {
-            // Here we have to check if threads that were supposedly not synchronized, are not actually dead now;
-            // they can still be NOT dead in g_tid_status[i].status because at signal sending time they could still live;
-            // such an issue also happened in VD's testing at least once
-            int additional_deaths = 0;
-            for (int i = 0; i < g_all_tid_num; ++i) {
-                int tid = g_tid_status[i].tid;
-
-                if (tids_after_sync.count(tid) == 0) {
-                    // TID is actually dead!
-                    ++additional_deaths;
-                    g_tid_status[i].status = TID_STATUS_DEAD;
-                }
-            }
-
-            if(g_threads_count - additional_deaths > 0) {
-                LogError("Not all threads synchronized: threads left: " <<
-                         g_threads_count - additional_deaths);
-
-                for (int i = 0; i < g_all_tid_num; ++i) {
-                    switch (g_tid_status[i].status) {
-                        case TID_STATUS_DEAD:
-                            /* this is totally fine */
-                            LogError("Thread " << g_tid_status[i].tid << " died during attempt to send signal - info can be useful, but this is OK");
-                            break;
-                        case TID_STATUS_NOT_SYNCED:
-                            LogError("Thread " << g_tid_status[i].tid << " didn't receive the signal, serious ERROR!!!");
-                            break;
-                        case TID_STATUS_RECEIVED_SIGNAL:
-                            LogError("Thread " << g_tid_status[i].tid << " received signal but failed at Smack label change, serious ERROR!!!");
-                            break;
-                        case TID_STATUS_SETUP_SMACK:
-                            LogError("Thread " << g_tid_status[i].tid << " received signal, changed Smack label, couldn't change caps, serious ERROR!!!");
-                            break;
-                        case TID_STATUS_FULLY_SYNCED:
-                            /* this is totally fine */
-                            LogError("Thread " << g_tid_status[i].tid << " received signal and was fully synchronized - info can be useful, but this is OK");
-                            break;
-                        default:
-                            LogError("Thread " << g_tid_status[i].tid << " has strange status: " << g_tid_status[i].status << " this is an ERROR!!!");
-                            break;
-                    }
-                }
-                LogError("Aborting - please check coredump and check offending threads!");
-                abort(); // needed to debug the offending thread(s)
-            }
+        if (time_left % 500 == 0)
+            LogWarning("Still waiting for threads to receive signals... signals received by: " << ready_tids
+                                                                                               << ", all tids alive now without current: "
+                                                                                               << current_tids.size());
+        if (time_left == 0) {
+            LogError("Too much waiting for receiving signals, aborting! Signals received by: " << ready_tids
+                                                                                               << ", out of alive tids without current: "
+                                                                                               << current_tids.size());
+            abort();
         }
+    } while (1);
 
-        /*
-         * Reaching here, we can assume all threads that were read from /proc previously are synced,
-         * except current thead. What's left are threads that were spawned by other threads after
-         * we listed them using the CheckProperDrop::checkNewThreads() for the first time.
-         */
-        for (const auto& tid : tids_before_sync)
-            tids_after_sync.erase(tid);
-
-        LogDebug("New threads left after synchronisation " << tids_after_sync.size());
-
-        /*
-         * Check if any of the remaining threads has a correct smack label. If that's the case move
-         * it to ignored ones. During experiments the label was either the original (privileged)
-         * one, the desired app label (g_p_app_label) or it was unreadable at all.
-         *
-         * This is necessary as threads with dropped capabilities may spawn new threads too. Newly
-         * spawned threads have dropped capabilities as well so we don't need to bother with them
-         * and definitely we don't want to abort because of them.
-         */
-        if (g_p_app_label) {
-            for (auto it = tids_after_sync.begin(); it != tids_after_sync.end();) {
-                auto label = readThreadLabel(*it);
-                if (label == *g_p_app_label) {
-                    tids_ignored.emplace(*it);
-                    it = tids_after_sync.erase(it);
-                } else {
-                    LogDebug("Privileged thread: " << *it << " label: '" << label << "' expected: '" << *g_p_app_label << "'");
-                    it++;
-                }
-            }
-        }
-
-        LogDebug("Remaining privileged threads " << tids_after_sync.size());
+    // here, all TIDs except current one are waiting to start changing attributes
+    // We can assume these TIDs will continue to live (no need to read /proc again), since no logic
+    // is currently going on in the process except this thread & waiting signal handlers
 
-        if (tids_after_sync.empty())
-            break; // all done
+    g_th_barrier++; // this starts the signal handlers - they will proceed once they wake up
 
-        if (attempt < 2) {
-            /*
-             * Move synced threads to ignored pool and retry. 2 retries were enough to handle 300
-             * recursively spawned threads in security_manager_300_prepare_app_recursive_threads.
-             * If necessary we can increase it.
-             */
-            LogDebug("Retrying");
-            tids_ignored.merge(std::move(tids_before_sync));
-            tids_before_sync = std::move(tids_after_sync);
-            continue;
-        }
+    int ready_tids = 0;
 
-        /*
-         * If app candidate is so stubborn it continues to spawn threads, we cannot iterate this
-         * forever and need to treat that filthy bastard with proper abort().
-         */
-        for (auto &tid : tids_after_sync)
-            LogError("New thread spawned during call to prepare_app that could not get synchronized, TID: " << tid);
+    for (int i = MAX_SIG_WAIT_TIME; i > 0; --i) {
+        // break if number of completed tids equals to all of them minus current one!
+        ready_tids = count_alive_tids_with_state(2, current_tids);
+        if (ready_tids == (int)current_tids.size())
+            break;
+        usleep(SLEEP_CONST);   // 2 ms
+        if (i % 500 == 0)
+            LogWarning("Still waiting for threads to finalize handlers... completed by: " << ready_tids
+                                                                                          << ", all tids managed now: "
+                                                                                          << current_tids.size());
+    }
 
-        abort(); // needed to debug the offending thread(s).
+    if (ready_tids != (int)current_tids.size()) {
+        // not all TIDs reached this stage, aborting!
+        LogError("Too much waiting for sig handler completion, aborting! Signals completed by: " << ready_tids
+                                                                                                 << ", all tids managed now: "
+                                                                                                 << current_tids.size());
+        abort();
     }
 
     /*
-     * The last step of successfull call - change attributes of one last thread, the main thread.
+     * Change attributes of one last thread, the main thread.
      */
-    if (g_p_app_label && label_for_self_internal(Syscall::gettid()) != 0) {
-        LogError("label_for_self_internal failed");
-        return SECURITY_MANAGER_ERROR_UNKNOWN;
+    if (g_p_app_label && label_for_self_internal(own_tid) != 0) {
+        LogError("label_for_self_internal failed for main thread");
+        abort();
     }
 
     if (cap_set_proc(g_cap)) {
         LogError("Can't drop main thread capabilities");
-        return SECURITY_MANAGER_ERROR_UNKNOWN;
+        abort();
     }
 
+    g_th_barrier++; // this starts signal handlers to proceed once they wake up - logic in app starts in env where all have changed labels
+
+    Syscall::sigaction(SIGSETXID, &old, nullptr);
+
     return SECURITY_MANAGER_SUCCESS;
 }
 
index 8a519549cc581d7813dee21d0cda9fa4d51a2f5e..03e073454f273a921f9a7b91126f6538c3432e96 100644 (file)
@@ -48,7 +48,7 @@ int8_t computeFlags();
 
 // CLIENT ONLY
 // returns threads TIDS
-std::unordered_set<int> checkNewThreads(const std::unordered_set<int>& ingored);
+std::unordered_set<int> checkThreads(int currentTid);
 
 }  // namespace CheckProperDrop
 }  // namespace SecurityManager