From 5c5aec15b15c3eba4d1908f1c85e0dbc9bc5f37c Mon Sep 17 00:00:00 2001 From: Guilherme Gallo Date: Sat, 15 Apr 2023 16:43:40 -0300 Subject: [PATCH] ci/lava: Integrate StructuralLogger with AutoSaveDict Let's use the AutoSaveDict as structural logger abstraction to enable real-time monitoring of LAVA jobs. Mainly used for local runs and debugging of Mesa CI LAVA jobs. Signed-off-by: Guilherme Gallo Part-of: --- .gitlab-ci/lava/lava_job_submitter.py | 86 ++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 26 deletions(-) diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py index 0ba515a..c2b6697 100755 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ b/.gitlab-ci/lava/lava_job_submitter.py @@ -9,7 +9,6 @@ """Send a job to LAVA, track it and collect log back""" - import contextlib import json import pathlib @@ -49,6 +48,14 @@ from lavacli.utils import flow_yaml as lava_yaml # sophisticated dict-like data abstractions. STRUCTURAL_LOG = defaultdict(list) +try: + from ci.structured_logger import StructuredLogger +except ImportError as e: + print_log( + f"Could not import StructuredLogger library: {e}. " + "Falling back to defaultdict based structured logger." + ) + # Timeout in seconds to decide if the device from the dispatched LAVA job has # hung or not due to the lack of new log output. DEVICE_HANGING_TIMEOUT_SEC = int(getenv("LAVA_DEVICE_HANGING_TIMEOUT_SEC", 5*60)) @@ -173,6 +180,7 @@ def is_job_hanging(job, max_idle_time): def parse_log_lines(job, log_follower, new_log_lines): + if log_follower.feed(new_log_lines): # If we had non-empty log data, we can assure that the device is alive. job.heartbeat() @@ -190,6 +198,7 @@ def parse_log_lines(job, log_follower, new_log_lines): def fetch_new_log_lines(job): + # The XMLRPC binary packet may be corrupted, causing a YAML scanner error. # Retry the log fetching several times before exposing the error. for _ in range(5): @@ -275,19 +284,17 @@ def execute_job_with_retries( ) -> Optional[LAVAJob]: for attempt_no in range(1, retry_count + 2): # Need to get the logger value from its object to enable autosave - # features + # features, if AutoSaveDict is enabled from StructuredLogging module jobs_log.append({}) job_log = jobs_log[-1] job = LAVAJob(proxy, job_definition, job_log) STRUCTURAL_LOG["dut_attempt_counter"] = attempt_no - try: submit_job(job) wait_for_job_get_started(job) log_follower: LogFollower = bootstrap_log_follower() follow_job_execution(job, log_follower) return job - except (MesaCIException, KeyboardInterrupt) as exception: job.handle_exception(exception) print_log( @@ -295,7 +302,6 @@ def execute_job_with_retries( f"Finished executing LAVA job in the attempt #{attempt_no}" f"{CONSOLE_LOG['RESET']}" ) - finally: job_log["finished_time"] = datetime.now().isoformat() print_job_final_status(job) @@ -354,14 +360,17 @@ class LAVAJobSubmitter(PathResolver): visibility_group: str = None # Only affects LAVA farm maintainers job_rootfs_overlay_url: str = None structured_log_file: pathlib.Path = None # Log file path with structured LAVA log + __structured_log_context = contextlib.nullcontext() # Structured Logger context def __post_init__(self) -> None: super().__post_init__() # Remove mesa job names with spaces, which breaks the lava-test-case command self.mesa_job_name = self.mesa_job_name.split(" ")[0] - if self.structured_log_file: - self.setup_structured_logger() + if not self.structured_log_file: + return + + self.__structured_log_context = StructuredLoggerWrapper(self).logger_context() def dump(self, job_definition): if self.dump_yaml: @@ -399,31 +408,56 @@ class LAVAJobSubmitter(PathResolver): if self.validate_only: return + with self.__structured_log_context: + try: + finished_job = retriable_follow_job(proxy, job_definition) + exit_code = 0 if finished_job.status == "pass" else 1 + STRUCTURAL_LOG["job_combined_status"] = job.status + sys.exit(exit_code) + + except Exception as exception: + STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception) + raise exception + + +class StructuredLoggerWrapper: + def __init__(self, submitter: LAVAJobSubmitter) -> None: + self.__submitter: LAVAJobSubmitter = submitter + + def _init_logger(self): + STRUCTURAL_LOG["fixed_tags"] = self.__submitter.lava_tags + STRUCTURAL_LOG["dut_job_type"] = self.__submitter.device_type + STRUCTURAL_LOG["job_combined_fail_reason"] = None + STRUCTURAL_LOG["job_combined_status"] = "not_submitted" + STRUCTURAL_LOG["dut_attempt_counter"] = 0 + + # Initialize dut_jobs list to enable appends + STRUCTURAL_LOG["dut_jobs"] = [] + + @contextlib.contextmanager + def _simple_logger_context(self): + log_file = pathlib.Path(self.__submitter.structured_log_file) + log_file.parent.mkdir(parents=True, exist_ok=True) try: - finished_job = retriable_follow_job(proxy, job_definition) - except Exception as exception: - STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception) - raise exception - exit_code = 0 if finished_job.status == "pass" else 1 - STRUCTURAL_LOG["job_combined_status"] = job.status - sys.exit(exit_code) - - def setup_structured_logger(self): + # Truncate the file + log_file.write_text("") + yield + finally: + log_file.write_text(json.dumps(STRUCTURAL_LOG, indent=2)) + + def logger_context(self): + context = contextlib.nullcontext() try: + global STRUCTURAL_LOG STRUCTURAL_LOG = StructuredLogger( - self.structured_log_file, truncate=True + self.__submitter.structured_log_file, truncate=True ).data - except NameError as e: - print( - f"Could not import StructuredLogger library: {e}. " - "Falling back to DummyLogger" - ) + except NameError: + context = self._simple_logger_context() - STRUCTURAL_LOG["fixed_tags"] = self.lava_tags - STRUCTURAL_LOG["dut_job_type"] = self.device_type - STRUCTURAL_LOG["job_combined_fail_reason"] = None - STRUCTURAL_LOG["job_combined_status"] = "not_submitted" + self._init_logger() + return context if __name__ == "__main__": -- 2.7.4