From 736cd2e2b57bbcb950da8bd255e4aa72302dd78c Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Fri, 30 Oct 2020 18:26:04 -0400 Subject: [PATCH] PR26775: restore thread_work_groom metric to cycle count ... and add new metrics about progress of traversal and groom processes. Correct one control flow abnormality that could prematurely end a scanner thread and might have accounted for the inconsistent test results from the previous patch. Signed-off-by: Frank Ch. Eigler --- debuginfod/ChangeLog | 11 +++++++++++ debuginfod/debuginfod.cxx | 43 +++++++++++++++++++++++++++++-------------- tests/ChangeLog | 6 ++++++ tests/run-debuginfod-find.sh | 5 ++--- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 5af4dc6..ec791c6 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,14 @@ +2020-10-30 Frank Ch. Eigler + + PR26775 cont'd. + (thread_main_scanner): Ensure control doesn't leave + infinite loop until program exit, even if SIGUSR2. + (scan_source_paths): Have traverser clean scanq on + SIGUSR2. Emit additional traversed_total metrics. + (groom): Emit additional groomed_total metrics. + (thread_main_groom): Restore previous thread_work_total + metric. + 2020-10-29 Frank Ch. Eigler PR26775 diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 8448a50..1020a64 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -2711,7 +2711,8 @@ thread_main_scanner (void* arg) add_metric("thread_busy", "role", "scan", -1); bool gotone = scanq.wait_front(p); add_metric("thread_busy", "role", "scan", 1); - if (! gotone) continue; // or break + + if (! gotone) continue; // go back to waiting try { @@ -2750,13 +2751,9 @@ thread_main_scanner (void* arg) e.report(cerr); } - inc_metric("thread_work_total", "role","scan"); - - if (sigusr2 != forced_groom_count) // stop early if groom triggered - { - scanq.clear(); - break; - } + // finished a scanning step -- not a "loop", because we just + // consume the traversal loop's work, whenever + inc_metric("thread_work_total","role","scan"); } add_metric("thread_busy", "role", "scan", -1); @@ -2801,7 +2798,10 @@ scan_source_paths() if (interrupted) break; if (sigusr2 != forced_groom_count) // stop early if groom triggered - break; + { + scanq.clear(); // clear previously issued work for scanner threads + break; + } fts_scanned ++; @@ -2825,6 +2825,10 @@ scan_source_paths() if (verbose > 3) obatched(clog) << "fts skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl; fts_regex ++; + if (!ri) + inc_metric("traversed_total","type","regex-skipped-I"); + if (rx) + inc_metric("traversed_total","type","regex-skipped-X"); continue; } @@ -2832,6 +2836,7 @@ scan_source_paths() { case FTS_F: scanq.push_back (make_pair(rps, *f->fts_statp)); + inc_metric("traversed_total","type","file"); break; case FTS_ERR: @@ -2841,11 +2846,16 @@ scan_source_paths() auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path)); x.report(cerr); } + inc_metric("traversed_total","type","error"); break; - default: - ; - /* ignore */ + case FTS_D: // ignore + inc_metric("traversed_total","type","directory"); + break; + + default: // ignore + inc_metric("traversed_total","type","other"); + break; } } gettimeofday (&tv_end, NULL); @@ -2890,6 +2900,7 @@ thread_main_fts_source_paths (void* arg) set_metric("thread_busy", "role","traverse", 1); scan_source_paths(); last_rescan = time(NULL); // NB: now was before scanning + // finished a traversal loop inc_metric("thread_work_total", "role","traverse"); set_metric("thread_busy", "role","traverse", 0); } @@ -2970,9 +2981,11 @@ void groom() files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + inc_metric("groomed_total", "decision", "stale"); } - - inc_metric("thread_work_total", "role", "groom"); + else + inc_metric("groomed_total", "decision", "fresh"); + if (sigusr1 != forced_rescan_count) // stop early if scan triggered break; } @@ -3039,6 +3052,8 @@ thread_main_groom (void* /*arg*/) set_metric("thread_busy", "role", "groom", 1); groom (); last_groom = time(NULL); // NB: now was before grooming + // finished a grooming loop + inc_metric("thread_work_total", "role", "groom"); set_metric("thread_busy", "role", "groom", 0); } catch (const sqlite_exception& e) diff --git a/tests/ChangeLog b/tests/ChangeLog index 012e305..13abf89 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2020-10-30 Frank Ch. Eigler + + PR26775 + * run-debuginfod-find.sh: Modify test for restored + thread_work_total semantics for grooming. + 2020-10-29 Frank Ch. Eigler PR26775 diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 3af04c0..0d1fcf1 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -346,8 +346,7 @@ rm -r R/debuginfod-rpms/rhel6/* kill -USR2 $PID1 # groom cycle # Expect 3 rpms to be deleted by the groom # 1 groom cycle already took place at/soon-after startup, so -USR2 makes 2 -# ... times the # of files checked in each cycle -wait_ready $PORT1 'thread_work_total{role="groom"}' 51 +wait_ready $PORT1 'thread_work_total{role="groom"}' 2 wait_ready $PORT1 'groom{statistic="file d/e"}' 3 rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests @@ -364,7 +363,7 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2 # run a groom cycle to force server to drop its fdcache kill -USR2 $PID1 # groom cycle -wait_ready $PORT1 'thread_work_total{role="groom"}' 98 # 3 complete cycles +wait_ready $PORT1 'thread_work_total{role="groom"}' 3 # move it around a couple of times to make it likely to hit a nonexistent entry during iteration mv R/debuginfod-rpms/rhel7 R/debuginfod-rpms/rhel7renamed kill -USR1 $PID1 # scan cycle -- 2.7.4