Modify amd startup monitor
authorChanggyu Choi <changyu.choi@samsung.com>
Thu, 26 Dec 2024 08:17:17 +0000 (17:17 +0900)
committerChanggyu Choi <changyu.choi@samsung.com>
Thu, 2 Jan 2025 00:23:30 +0000 (09:23 +0900)
amd killed an app that had been blocked for a long time for debugging purposes with abort.
This patch reduces the frequency of crash by modifying these parts to print backtrace and cpu usage rate periodically.

Change-Id: I405eadc72602a2be025b8d041676c1e7f7dc3edd
Signed-off-by: Changgyu Choi <changyu.choi@samsung.com>
packaging/ac.service
src/lib/CMakeLists.txt
src/lib/app_status/app_status.cc
src/lib/app_status/app_status.hh
src/lib/app_status/app_status_manager.cc
src/lib/app_status/app_status_manager.hh
src/lib/common/worker.cc [new file with mode: 0644]
src/lib/common/worker.hh [new file with mode: 0644]
src/lib/launchpad/launchpad.hh
src/lib/launchpad/worker.cc [deleted file]
src/lib/launchpad/worker.hh [deleted file]

index 329e773c22c956d63cd0dc04436b7628561ac192..509df4a99e788c32e74ef394977972ff4d881dbd 100644 (file)
@@ -12,7 +12,7 @@ After=dbus.socket tizen-system-env.service systemd-tmpfiles-setup.service buxton
 User=app_fw
 Group=app_fw
 SmackProcessLabel=System
-Capabilities=cap_setuid,cap_setgid,cap_mac_admin,cap_kill,cap_dac_override,cap_sys_admin,cap_fowner=i
+Capabilities=cap_setuid,cap_setgid,cap_mac_admin,cap_kill,cap_dac_override,cap_sys_admin,cap_fowner,cap_sys_ptrace=i
 SecureBits=keep-caps
 Type=notify
 EnvironmentFile=/run/tizen-system-env
index 465d6169fe57125f5fb08a246baa53c2f1f6f690..2cfc27e795b132622854315f6bfd276a18ba6fc4 100644 (file)
@@ -34,6 +34,8 @@ AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/signal
   LIB_SIGNAL_SRCS)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/socket
   LIB_SOCKET_SRCS)
+AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/startup_monitor
+  LIB_STARTUP_MONITOR_SRCS)
 
 ADD_LIBRARY(${TARGET_LIB_AMD} SHARED
   ${LIB_SRCS}
@@ -54,6 +56,7 @@ ADD_LIBRARY(${TARGET_LIB_AMD} SHARED
   ${LIB_RES_INFO_SRCS}
   ${LIB_SIGNAL_SRCS}
   ${LIB_SOCKET_SRCS}
+  ${LIB_STARTUP_MONITOR_SRCS}
 )
 SET_TARGET_PROPERTIES(${TARGET_LIB_AMD} PROPERTIES SOVERSION ${MAJORVER})
 SET_TARGET_PROPERTIES(${TARGET_LIB_AMD} PROPERTIES VERSION ${FULLVER})
index 14b6d59b867094f177bff49d9e9bcaa7edc1eb13..5c21e6192c460f773f67b3d6f7f7273a68856c53 100644 (file)
@@ -179,22 +179,22 @@ AppStatus::AppStatus(unsigned int request_id, std::string appid,
 }
 
 AppStatus::~AppStatus() {
-  if (dying_timer_ != 0)
-    g_source_remove(dying_timer_);
+  if (dying_timer_ != nullptr)
+    tizen_core_source_destroy(dying_timer_);
 
-  if (startup_timer_ != 0)
-    g_source_remove(startup_timer_);
+  if (startup_timer_ != nullptr)
+    tizen_core_source_destroy(startup_timer_);
 }
 
-gboolean AppStatus::DyingTimeoutHandler(gpointer data) {
+bool AppStatus::DyingTimeoutHandler(void* data) {
   auto* app_status = static_cast<AppStatus*>(data);
-  app_status->dying_timer_ = 0;
+  app_status->dying_timer_ = nullptr;
   app_status->event_listener_->OnDyingTimeout(app_status);
-  return G_SOURCE_REMOVE;
+  return false;
 }
 
 void AppStatus::SetDyingEvent() {
-  if (dying_timer_ != 0) {
+  if (dying_timer_ != nullptr) {
     _E("Already set event");
     return;
   }
@@ -204,41 +204,65 @@ void AppStatus::SetDyingEvent() {
     return;
   }
 
-  dying_timer_ =
-      g_timeout_add_seconds(DYING_TIMEOUT, DyingTimeoutHandler, this);
-  if (dying_timer_ == 0) {
+  tizen_core_h core = nullptr;
+  int ret = tizen_core_find("main", &core);
+  if (ret != TIZEN_CORE_ERROR_NONE) {
+    _E("tizen_core_find() is failed. error(%d)", ret);
+    return;
+  }
+
+  ret = tizen_core_add_timer(core, DYING_TIMEOUT * 1000, DyingTimeoutHandler,
+      this, &dying_timer_);
+  if (ret != TIZEN_CORE_ERROR_NONE) {
     _E("Failed to set timeout callback");
     return;
   }
 }
 
 void AppStatus::SetStartupTimer() {
-  if (startup_timer_ != 0)
+  if (startup_timer_ != nullptr)
     return;
 
-  startup_timer_ = g_timeout_add_seconds(STARTUP_TIMEOUT,
-      +[](gpointer data) -> gboolean {
-    auto* app_status = static_cast<AppStatus*>(data);
-    if (!app_status->event_listener_->OnStartupTimeout(app_status)) {
-      app_status->startup_timer_ = 0;
-      return G_SOURCE_REMOVE;
-    }
+  tizen_core_h core = nullptr;
+  int ret = tizen_core_find("main", &core);
+  if (ret != TIZEN_CORE_ERROR_NONE) {
+    _E("tizen_core_find() is failed. error(%d)", ret);
+    return;
+  }
 
-    return G_SOURCE_CONTINUE;
-  }, this);
+  tizen_core_source_h source = nullptr;
+  ret = tizen_core_add_timer(core, STARTUP_TIMEOUT * 1000,
+      +[](void* user_data) -> bool {
+        auto* app_status = static_cast<AppStatus*>(user_data);
+        if (!app_status->event_listener_->OnStartupTimeout(app_status))
+          app_status->UnsetStartupTimer();
+
+        return true;
+      }, this, &source);
+  if (ret != TIZEN_CORE_ERROR_NONE) {
+    _E("tizen_core_add_timer() is failed. error(%d)", ret);
+    return;
+  }
 
-  if (startup_timer_ == 0) {
-    _E("Failed to set timeout callback");
+  startup_timer_ = source;
+  aul_cpu_times_h cpu_times = nullptr;
+  ret = aul_cpu_times_create(pid_, &cpu_times);
+  if (ret != AUL_R_OK) {
+    _E("Failed to create cpu times handle");
     return;
   }
+
+  aul_cpu_times_update(cpu_times);
+  cpu_times_.reset(cpu_times, aul_cpu_times_destroy);
 }
 
 void AppStatus::UnsetStartupTimer() {
-  if (startup_timer_ == 0)
+  if (startup_timer_ == nullptr)
     return;
 
-  g_source_remove(startup_timer_);
-  startup_timer_ = 0;
+  cpu_times_.reset();
+  tizen_core_source_destroy(startup_timer_);
+  startup_timer_ = nullptr;
 }
 
 const AppInfo* AppStatus::GetAppInfo() const {
@@ -430,4 +454,9 @@ void AppStatus::SetLatestCmd(int cmd) { latest_cmd_ = cmd; }
 
 int AppStatus::GetLatestCmd() const { return latest_cmd_; }
 
+std::shared_ptr<std::remove_pointer_t<aul_cpu_times_h>> AppStatus::GetCpuTimes()
+    const {
+  return cpu_times_;
+}
+
 }  // namespace amd
index 7a4a1e1d18ee730ecb52bddb45c4e7558b24d344..79060e7303d35f0081163e1427f62833416c5e5d 100644 (file)
@@ -17,7 +17,9 @@
 #ifndef LIB_APP_STATUS_APP_STATUS_HH_
 #define LIB_APP_STATUS_APP_STATUS_HH_
 
+#include <aul_cpu_monitor.h>
 #include <glib.h>
+#include <tizen_core.h>
 
 #include <memory>
 #include <string>
@@ -143,9 +145,11 @@ class AppStatus : public std::enable_shared_from_this<AppStatus> {
   void SetLatestCmd(int cmd);
   int GetLatestCmd() const;
 
+  std::shared_ptr<std::remove_pointer_t<aul_cpu_times_h>> GetCpuTimes() const;
+
  private:
   friend class AppStatusManager;
-  static gboolean DyingTimeoutHandler(gpointer user_data);
+  static bool DyingTimeoutHandler(void* user_data);
 
   unsigned int request_id_;
   std::string appid_;
@@ -165,8 +169,8 @@ class AppStatus : public std::enable_shared_from_this<AppStatus> {
   IEvent* event_listener_;
   int timestamp_ = 0;
   int fg_count_ = 0;
-  guint dying_timer_ = 0;
-  guint startup_timer_ = 0;
+  tizen_core_source_h dying_timer_ = nullptr;
+  tizen_core_source_h startup_timer_ = nullptr;
   bool socket_exists_ = false;
   bool starting_ = false;
   bool exiting_ = false;
@@ -174,6 +178,7 @@ class AppStatus : public std::enable_shared_from_this<AppStatus> {
   int64_t start_time_;  /* start timestamp millisecond */
   int delay_count_ = 0;
   int latest_cmd_ = 0;
+  std::shared_ptr<std::remove_pointer_t<aul_cpu_times_h>> cpu_times_;
 };
 
 }  // namespace amd
index 8d6a4321ee8113d38fce7c60018aa2b9deaacc0d..10ffec2fae694d38b37b32271d5df2ba76e0c476 100644 (file)
@@ -17,7 +17,9 @@
 #include "lib/app_status/app_status_manager.hh"
 
 #include <aul.h>
+#include <aul_backtrace.h>
 #include <aul_cmd.h>
+#include <aul_cpu_monitor.h>
 #include <aul_proc.h>
 #include <aul_sock.h>
 #include <bundle.h>
@@ -160,6 +162,31 @@ inline bool IsCrashStatus(int status) {
   return WCOREDUMP(status);
 }
 
+class BacktraceJob : public Worker::Job {
+ public:
+  explicit BacktraceJob(pid_t pid,
+      std::shared_ptr<std::remove_pointer_t<aul_cpu_times_h>> cpu_times)
+          : pid_(pid), cpu_times_(std::move(cpu_times)) {}
+  ~BacktraceJob() override = default;
+
+  void Do() override {
+    int ret = aul_backtrace_print(pid_);
+    if (ret != 0)
+      _E("aul_backtrace_print() failed. ret(%d)", ret);
+
+    if (!cpu_times_)
+      return;
+
+    aul_cpu_times_update(cpu_times_.get());
+    double cpu_usage = 0.0f;
+    aul_cpu_monitor_get_cpu_usage(cpu_times_.get(), &cpu_usage);
+  }
+
+ private:
+  pid_t pid_;
+  std::shared_ptr<std::remove_pointer_t<aul_cpu_times_h>> cpu_times_;
+};
+
 }  // namespace
 
 void AppStatusManager::OnDyingTimeout(AppStatus* app_status) {
@@ -204,7 +231,7 @@ bool AppStatusManager::OnStartupTimeout(AppStatus* app_status) {
       app_status->GetAppID().c_str(), app_status->GetPID(),
       app_status->GetDelayCount());
 
-  if (app_status->GetDelayCount() >= 10) {
+  if (app_status->GetDelayCount() >= 100) {
     _E("App(%s[%d]) startup signal has not been received.",
         app_status->GetAppID().c_str(), app_status->GetPID());
     amd::AppStatusManager::GetInst().Update(app_status->shared_from_this(),
@@ -213,6 +240,11 @@ bool AppStatusManager::OnStartupTimeout(AppStatus* app_status) {
     return false;
   }
 
+  if (app_status->GetDelayCount() % 5 == 0) {
+    backtrace_worker_->Post(
+        std::make_shared<BacktraceJob>(app_status->GetPID(), app_status->cpu_times_));
+  }
+
   return true;
 }
 
@@ -1184,7 +1216,9 @@ int AppStatusManager::Init() {
   AddVconfInitTimer();
   _noti_listen(AMD_NOTI_MSG_LAUNCH_PREPARE_END, OnLaunchPrepareEnd);
   app_status_dao_.reset(new AppStatusDaoImpl());
+  backtrace_worker_.reset(new Worker("Backtrace+"));
   RunThread();
+
   disposed_ = false;
   return 0;
 }
@@ -1192,6 +1226,7 @@ int AppStatusManager::Init() {
 int AppStatusManager::Finish() {
   if (disposed_) return 0;
 
+  backtrace_worker_.reset();
   queue_.Push(std::make_shared<AppResult>(true));
   thread_.join();
 
index 580fc9049199bb12bf4910a70775de287316166d..f110b247789522715a504c801ef7aabc189545a4 100644 (file)
@@ -37,6 +37,7 @@
 #include "lib/app_status/app_status.hh"
 #include "lib/app_status/app_status_dao.hh"
 #include "lib/app_status/pkg_status.hh"
+#include "lib/common/worker.hh"
 #include "lib/launchpad/launchpad.hh"
 #include "lib/launch/launch_context.hh"
 
@@ -148,6 +149,7 @@ class AppStatusManager : public AppStatus::IEvent, public Launchpad::IEvent {
   tizen_base::SharedQueue<std::shared_ptr<AppResult>> queue_;
   std::unordered_map<uid_t, std::unordered_map<std::string, int>>
       crash_count_map_;
+  std::unique_ptr<Worker> backtrace_worker_;
 };
 
 }  // namespace amd
diff --git a/src/lib/common/worker.cc b/src/lib/common/worker.cc
new file mode 100644 (file)
index 0000000..e21ff46
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lib/common/worker.hh"
+
+#include <utility>
+
+#include "lib/common/exception.hh"
+#include "lib/common/log_private.hh"
+
+namespace amd {
+
+Worker::Worker(std::string name) : name_(std::move(name)) {
+  int ret = tizen_core_task_create(name_.c_str(), true, &task_);
+  if (ret != TIZEN_CORE_ERROR_NONE) {
+    _E("tizen_core_task_create() is failed. error=%d", ret);
+    THROW(ret);
+  }
+
+  tizen_core_task_get_tizen_core(task_, &core_);
+  tizen_core_task_run(task_);
+}
+
+Worker::~Worker() {
+  bool running = false;
+  tizen_core_task_is_running(task_, &running);
+  if (running) tizen_core_task_quit(task_);
+
+  tizen_core_task_destroy(task_);
+}
+
+
+void Worker::Post(std::shared_ptr<Job> job) {
+  queue_.Push(std::move(job));
+  tizen_core_source_h source = nullptr;
+  tizen_core_add_idle_job(core_, JobCb, this, &source);
+  if (source == nullptr) _E("Failed to add idle job");
+}
+
+bool Worker::JobCb(void* user_data) {
+  auto* worker = static_cast<Worker*>(user_data);
+  while (!worker->queue_.IsEmpty()) {
+    auto job = worker->queue_.WaitAndPop();
+    if (job) job->Do();
+  }
+
+  return false;
+}
+
+}  // namespace amd
diff --git a/src/lib/common/worker.hh b/src/lib/common/worker.hh
new file mode 100644 (file)
index 0000000..878a3f6
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_COMMON_WORKER_HH_
+#define LIB_COMMON_WORKER_HH_
+
+#include <tizen_core.h>
+
+#include <memory>
+#include <string>
+
+#include <shared-queue.hpp>
+
+namespace amd {
+
+class Worker {
+ public:
+  class Job {
+   public:
+    virtual ~Job() = default;
+    virtual void Do() {}
+  };
+
+  explicit Worker(std::string name);
+  virtual ~Worker();
+
+  void Post(std::shared_ptr<Job> job);
+ private:
+  static bool JobCb(void* user_data);
+
+ private:
+  std::string name_;
+  tizen_core_task_h task_ = nullptr;
+  tizen_core_h core_ = nullptr;
+  tizen_base::SharedQueue<std::shared_ptr<Job>> queue_;
+};
+
+}  // namespace amd
+
+#endif  // LIB_LAUNCHPAD_WORKER_HH_
index 66322ce6d3bbee84387a92056481c8ca855e52c6..4bd8d4de1e73e63b317a339ce1d35a496365019c 100644 (file)
@@ -28,8 +28,8 @@
 #include <unordered_map>
 
 #include "lib/amd_launchpad.h"
+#include "lib/common/worker.hh"
 #include "lib/launchpad/client_channel.hh"
-#include "lib/launchpad/worker.hh"
 
 namespace amd {
 
diff --git a/src/lib/launchpad/worker.cc b/src/lib/launchpad/worker.cc
deleted file mode 100644 (file)
index be570e5..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "lib/launchpad/worker.hh"
-
-#include <utility>
-
-#include "lib/common/exception.hh"
-#include "lib/common/log_private.hh"
-
-namespace amd {
-
-Worker::Worker(std::string name) : name_(std::move(name)) {
-  int ret = tizen_core_task_create(name_.c_str(), true, &task_);
-  if (ret != TIZEN_CORE_ERROR_NONE) {
-    _E("tizen_core_task_create() is failed. error=%d", ret);
-    THROW(ret);
-  }
-
-  tizen_core_task_get_tizen_core(task_, &core_);
-  tizen_core_task_run(task_);
-}
-
-Worker::~Worker() {
-  bool running = false;
-  tizen_core_task_is_running(task_, &running);
-  if (running) tizen_core_task_quit(task_);
-
-  tizen_core_task_destroy(task_);
-}
-
-
-void Worker::Post(std::shared_ptr<Job> job) {
-  queue_.Push(std::move(job));
-  tizen_core_source_h source = nullptr;
-  tizen_core_add_idle_job(core_, JobCb, this, &source);
-  if (source == nullptr) _E("Failed to add idle job");
-}
-
-bool Worker::JobCb(void* user_data) {
-  auto* worker = static_cast<Worker*>(user_data);
-  while (!worker->queue_.IsEmpty()) {
-    auto job = worker->queue_.WaitAndPop();
-    if (job) job->Do();
-  }
-
-  return false;
-}
-
-}  // namespace amd
diff --git a/src/lib/launchpad/worker.hh b/src/lib/launchpad/worker.hh
deleted file mode 100644 (file)
index 7631758..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIB_LAUNCHPAD_WORKER_HH_
-#define LIB_LAUNCHPAD_WORKER_HH_
-
-#include <tizen_core.h>
-
-#include <memory>
-#include <string>
-
-#include <shared-queue.hpp>
-
-namespace amd {
-
-class Worker {
- public:
-  class Job {
-   public:
-    virtual ~Job() = default;
-    virtual void Do() {}
-  };
-
-  explicit Worker(std::string name);
-  virtual ~Worker();
-
-  void Post(std::shared_ptr<Job> job);
- private:
-  static bool JobCb(void* user_data);
-
- private:
-  std::string name_;
-  tizen_core_task_h task_ = nullptr;
-  tizen_core_h core_ = nullptr;
-  tizen_base::SharedQueue<std::shared_ptr<Job>> queue_;
-};
-
-}  // namespace amd
-
-#endif  // LIB_LAUNCHPAD_WORKER_HH_
\ No newline at end of file