ci/lava: Integrate StructuralLogger with AutoSaveDict
authorGuilherme Gallo <guilherme.gallo@collabora.com>
Sat, 15 Apr 2023 19:43:40 +0000 (16:43 -0300)
committerMarge Bot <emma+marge@anholt.net>
Wed, 19 Apr 2023 14:36:37 +0000 (14:36 +0000)
Let's use the AutoSaveDict as structural logger abstraction to enable
real-time monitoring of LAVA jobs. Mainly used for local runs and
debugging of Mesa CI LAVA jobs.

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22500>

.gitlab-ci/lava/lava_job_submitter.py

index 0ba515a..c2b6697 100755 (executable)
@@ -9,7 +9,6 @@
 
 """Send a job to LAVA, track it and collect log back"""
 
-
 import contextlib
 import json
 import pathlib
@@ -49,6 +48,14 @@ from lavacli.utils import flow_yaml as lava_yaml
 # sophisticated dict-like data abstractions.
 STRUCTURAL_LOG = defaultdict(list)
 
+try:
+    from ci.structured_logger import StructuredLogger
+except ImportError as e:
+    print_log(
+        f"Could not import StructuredLogger library: {e}. "
+        "Falling back to defaultdict based structured logger."
+    )
+
 # Timeout in seconds to decide if the device from the dispatched LAVA job has
 # hung or not due to the lack of new log output.
 DEVICE_HANGING_TIMEOUT_SEC = int(getenv("LAVA_DEVICE_HANGING_TIMEOUT_SEC",  5*60))
@@ -173,6 +180,7 @@ def is_job_hanging(job, max_idle_time):
 
 
 def parse_log_lines(job, log_follower, new_log_lines):
+
     if log_follower.feed(new_log_lines):
         # If we had non-empty log data, we can assure that the device is alive.
         job.heartbeat()
@@ -190,6 +198,7 @@ def parse_log_lines(job, log_follower, new_log_lines):
 
 
 def fetch_new_log_lines(job):
+
     # The XMLRPC binary packet may be corrupted, causing a YAML scanner error.
     # Retry the log fetching several times before exposing the error.
     for _ in range(5):
@@ -275,19 +284,17 @@ def execute_job_with_retries(
 ) -> Optional[LAVAJob]:
     for attempt_no in range(1, retry_count + 2):
         # Need to get the logger value from its object to enable autosave
-        # features
+        # features, if AutoSaveDict is enabled from StructuredLogging module
         jobs_log.append({})
         job_log = jobs_log[-1]
         job = LAVAJob(proxy, job_definition, job_log)
         STRUCTURAL_LOG["dut_attempt_counter"] = attempt_no
-
         try:
             submit_job(job)
             wait_for_job_get_started(job)
             log_follower: LogFollower = bootstrap_log_follower()
             follow_job_execution(job, log_follower)
             return job
-
         except (MesaCIException, KeyboardInterrupt) as exception:
             job.handle_exception(exception)
             print_log(
@@ -295,7 +302,6 @@ def execute_job_with_retries(
                 f"Finished executing LAVA job in the attempt #{attempt_no}"
                 f"{CONSOLE_LOG['RESET']}"
             )
-
         finally:
             job_log["finished_time"] = datetime.now().isoformat()
             print_job_final_status(job)
@@ -354,14 +360,17 @@ class LAVAJobSubmitter(PathResolver):
     visibility_group: str = None  # Only affects LAVA farm maintainers
     job_rootfs_overlay_url: str = None
     structured_log_file: pathlib.Path = None  # Log file path with structured LAVA log
+    __structured_log_context = contextlib.nullcontext()  # Structured Logger context
 
     def __post_init__(self) -> None:
         super().__post_init__()
         # Remove mesa job names with spaces, which breaks the lava-test-case command
         self.mesa_job_name = self.mesa_job_name.split(" ")[0]
 
-        if self.structured_log_file:
-            self.setup_structured_logger()
+        if not self.structured_log_file:
+            return
+
+        self.__structured_log_context = StructuredLoggerWrapper(self).logger_context()
 
     def dump(self, job_definition):
         if self.dump_yaml:
@@ -399,31 +408,56 @@ class LAVAJobSubmitter(PathResolver):
         if self.validate_only:
             return
 
+        with self.__structured_log_context:
+            try:
+                finished_job = retriable_follow_job(proxy, job_definition)
+                exit_code = 0 if finished_job.status == "pass" else 1
+                STRUCTURAL_LOG["job_combined_status"] = job.status
+                sys.exit(exit_code)
+
+            except Exception as exception:
+                STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception)
+                raise exception
+
+
+class StructuredLoggerWrapper:
+    def __init__(self, submitter: LAVAJobSubmitter) -> None:
+        self.__submitter: LAVAJobSubmitter = submitter
+
+    def _init_logger(self):
+        STRUCTURAL_LOG["fixed_tags"] = self.__submitter.lava_tags
+        STRUCTURAL_LOG["dut_job_type"] = self.__submitter.device_type
+        STRUCTURAL_LOG["job_combined_fail_reason"] = None
+        STRUCTURAL_LOG["job_combined_status"] = "not_submitted"
+        STRUCTURAL_LOG["dut_attempt_counter"] = 0
+
+        # Initialize dut_jobs list to enable appends
+        STRUCTURAL_LOG["dut_jobs"] = []
+
+    @contextlib.contextmanager
+    def _simple_logger_context(self):
+        log_file = pathlib.Path(self.__submitter.structured_log_file)
+        log_file.parent.mkdir(parents=True, exist_ok=True)
         try:
-            finished_job = retriable_follow_job(proxy, job_definition)
-        except Exception as exception:
-            STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception)
-            raise exception
-        exit_code = 0 if finished_job.status == "pass" else 1
-        STRUCTURAL_LOG["job_combined_status"] = job.status
-        sys.exit(exit_code)
-
-    def setup_structured_logger(self):
+            # Truncate the file
+            log_file.write_text("")
+            yield
+        finally:
+            log_file.write_text(json.dumps(STRUCTURAL_LOG, indent=2))
+
+    def logger_context(self):
+        context = contextlib.nullcontext()
         try:
+
             global STRUCTURAL_LOG
             STRUCTURAL_LOG = StructuredLogger(
-                self.structured_log_file, truncate=True
+                self.__submitter.structured_log_file, truncate=True
             ).data
-        except NameError as e:
-            print(
-                f"Could not import StructuredLogger library: {e}. "
-                "Falling back to DummyLogger"
-            )
+        except NameError:
+            context = self._simple_logger_context()
 
-        STRUCTURAL_LOG["fixed_tags"] = self.lava_tags
-        STRUCTURAL_LOG["dut_job_type"] = self.device_type
-        STRUCTURAL_LOG["job_combined_fail_reason"] = None
-        STRUCTURAL_LOG["job_combined_status"] = "not_submitted"
+        self._init_logger()
+        return context
 
 
 if __name__ == "__main__":