# pylint:disable=line-too-long
import argparse
-import csv
import json
import os
+import sys
from collections import OrderedDict
from glob import glob
from operator import itemgetter
from pathlib import Path
+import logging as log
-from memcheck_upload import create_memcheck_records
from pymongo import MongoClient
+from memcheck_upload import create_memcheck_records
# Database arguments
-DATABASE = 'memcheck'
+from memcheck_upload import DATABASE, DB_COLLECTIONS
+
+
+class HashableDict(dict):
+ """Dictionary class with defined __hash__ to make it hashable
+ (e.g. use as key in another dictionary)"""
+ def __hash__(self):
+ return hash(tuple(sorted(self.items())))
def get_db_memcheck_records(query, db_collection, db_name, db_url):
return items
-def prepare_comparison_table_csv(data, data_metrics, output_file):
- """generate .csv file with table based on provided data"""
- fields = list(data[0].keys())
- metrics_names = list(data_metrics[0].keys())
- HEADERS = fields + metrics_names
- with open(output_file, 'w', newline="") as csvfile:
- csvwriter = csv.writer(csvfile)
- csvwriter.writerow(HEADERS)
- for i in range(len(data)):
- row = []
- for field in fields:
- row.append(data[i][field])
- for metric_name in metrics_names:
- row.append(data_metrics[i][metric_name])
- csvwriter.writerow(row)
-
-
def compare_memcheck_2_runs(cur_values, references, output_file=None):
"""Compares 2 MemCheckTests runs and prepares a report on specified path"""
+ import pandas # pylint:disable=import-outside-toplevel
+ from scipy.stats import gmean # pylint:disable=import-outside-toplevel
+
+ returncode = 0
+
+ # constants
+ metric_name_template = "{} {}"
+ GEOMEAN_THRESHOLD = 0.9
# Fields should be presented in both `references` and `cur_values`.
# Some of metrics may be missing for one of `references` and `cur_values`.
("cur-ref", lambda x, y: y - x if (x is not None and y is not None) else None),
("ref/cur", lambda x, y: x / y if (x is not None and y is not None) else None)
])
-
- filtered_refs = []
- filtered_refs_metrics = []
+ # `Comparison_ops` is a template applied for metrics columns
+ # generated by applied `ops` to propagate status of function
+ comparison_ops = {
+ # format: {metric_col_name: (operation, message)}
+ metric_name_template.format("vmrss", "ref/cur"):
+ lambda x: (gmean(x) > GEOMEAN_THRESHOLD,
+ "geomean={} is less than threshold={}".format(gmean(x), GEOMEAN_THRESHOLD)),
+ metric_name_template.format("vmhwm", "ref/cur"):
+ lambda x: (gmean(x) > GEOMEAN_THRESHOLD,
+ "geomean={} is less than threshold={}".format(gmean(x), GEOMEAN_THRESHOLD))
+ }
+
+ filtered_refs = {}
for record in references:
filtered_rec = {key: val for key, val in record.items() if key in required_fields}
filtered_rec_metrics = {key: val for key, val in record["metrics"].items() if key in required_metrics}
- filtered_refs.append(filtered_rec)
- filtered_refs_metrics.append(filtered_rec_metrics)
- assert len(filtered_refs) == len(filtered_refs_metrics), \
- "Filtered references and metrics should contain equal number of records. " \
- "References len: {}, metrics len: {}".format(len(filtered_refs), len(filtered_refs_metrics))
-
- filtered_cur_val = []
- filtered_cur_val_metrics = []
+ filtered_refs[HashableDict(filtered_rec)] = filtered_rec_metrics
+
+ filtered_cur_val = {}
for record in cur_values:
filtered_rec = {key: val for key, val in record.items() if key in required_fields}
filtered_rec_metrics = {key: val for key, val in record["metrics"].items() if key in required_metrics}
- filtered_cur_val.append(filtered_rec)
- filtered_cur_val_metrics.append(filtered_rec_metrics)
- assert len(filtered_cur_val) == len(filtered_cur_val_metrics), \
- "Filtered current values and metrics should contain equal number of records. " \
- "Current values len: {}, metrics len: {}".format(len(filtered_refs), len(filtered_refs_metrics))
+ filtered_cur_val[HashableDict(filtered_rec)] = filtered_rec_metrics
comparison_data = []
for data in [filtered_refs, filtered_cur_val]:
comparison_data.append(rec)
comparison_data = sorted(comparison_data, key=itemgetter("model"))
- comparison_data_metrics = []
for record in comparison_data:
- try:
- i = filtered_refs.index(record)
- except ValueError:
- i = -1
-
- try:
- j = filtered_cur_val.index(record)
- except ValueError:
- j = -1
-
metrics_rec = OrderedDict()
for metric in required_metrics:
- ref = filtered_refs_metrics[i][metric] if i != -1 and metric in filtered_refs_metrics[i] else None
- cur = filtered_cur_val_metrics[j][metric] if j != -1 and metric in filtered_cur_val_metrics[j] else None
+ ref = filtered_refs.get(HashableDict(record), {}).get(metric, None)
+ cur = filtered_cur_val.get(HashableDict(record), {}).get(metric, None)
for op_name, op in ops.items():
op_res = op(ref, cur)
- metric_name = "{} {}".format(metric, op_name)
+ metric_name = metric_name_template.format(metric, op_name)
metrics_rec.update({metric_name: op_res})
metrics_rec.move_to_end(metric_name)
-
- comparison_data_metrics.append(metrics_rec)
-
- assert len(comparison_data) == len(comparison_data_metrics), \
- "Data and metrics for comparison should contain equal number of records. Data len: {}, metrics len: {}" \
- .format(len(comparison_data), len(comparison_data_metrics))
-
+ # update `comparison_data` with metrics
+ for metric_name, op_res in metrics_rec.items():
+ record.update({metric_name: op_res})
+ record.move_to_end(metric_name)
+
+ # compare data using `comparison_ops`
+ orig_data = pandas.DataFrame(comparison_data)
+ data = orig_data.dropna()
+
+ devices = data["device"].unique()
+ for device in devices:
+ frame = data[data["device"] == device]
+ for field, comp_op in comparison_ops.items():
+ status, msg = comp_op(frame.loc[:, field])
+ if not status:
+ log.error('Comparison for field="%s" for device="%s" failed: %s', field, device, msg)
+ returncode = 1
+
+ # dump data to file
if output_file:
- prepare_comparison_table_csv(comparison_data, comparison_data_metrics, output_file)
+ if os.path.splitext(output_file)[1] == ".html":
+ orig_data.to_html(output_file)
+ else:
+ orig_data.to_csv(output_file)
+ log.info('Created memcheck comparison report %s', output_file)
+
+ return returncode
def cli_parser():
"""parse command-line arguments"""
- parser = argparse.ArgumentParser(description='Tool to create a table with comparison '
- 'of 2 runs of MemCheckTests')
+ parser = argparse.ArgumentParser(description='Compare 2 runs of MemCheckTests')
parser.add_argument('cur_source',
help='Source of current values of MemCheckTests. '
'Should contain path to a folder with logs or '
parser.add_argument('--db_collection',
help=f'Collection name in "{DATABASE}" database to query'
f' data using current source.',
- choices=["commit", "nightly", "weekly"])
+ choices=DB_COLLECTIONS)
parser.add_argument('--ref_db_collection',
help=f'Collection name in "{DATABASE}" database to query'
f' data using reference source.',
- choices=["commit", "nightly", "weekly"])
+ choices=DB_COLLECTIONS)
parser.add_argument('--out_file', dest='output_file', type=Path,
help='Path to a file (with name) to save results. '
'Example: /home/.../file.csv')
args = cli_parser()
references = get_memcheck_records(args.ref_source, args.ref_db_collection, DATABASE, args.db_url)
cur_values = get_memcheck_records(args.cur_source, args.db_collection, DATABASE, args.db_url)
- compare_memcheck_2_runs(cur_values, references, output_file=args.output_file)
+ exit_code = compare_memcheck_2_runs(cur_values, references, output_file=args.output_file)
+ sys.exit(exit_code)
upload_memcheck_records, \
create_memcheck_report, \
metadata_from_manifest
+from compare_memcheck_2_runs import compare_memcheck_2_runs, \
+ get_memcheck_records, get_db_memcheck_records
-DATABASE = 'memcheck'
-COLLECTIONS = ["commit", "nightly", "weekly"]
+# Database arguments
+from memcheck_upload import DATABASE, DB_COLLECTIONS
def run(args, log=None, verbose=True):
init_parser = argparse.ArgumentParser(add_help=False)
init_parser.add_argument('--timeline_report',
- help=f'Create timeline HTML report file name.')
+ help=f'create timeline HTML report file name')
init_parser.add_argument('--upload', action="store_true",
- help=f'Upload results to database.')
+ help=f'upload results to database')
+ init_parser.add_argument('--compare',
+ metavar='REFERENCE',
+ help='compare run with reference.'
+ ' Should contain path to a folder with MemCheckTests logs or'
+ ' query to request data from DB in "key=value[,key=value]" format')
args = init_parser.parse_known_args()[0]
parser = argparse.ArgumentParser(
usage='%(prog)s [options] binary -- [additional args]',
parents=[init_parser])
parser.add_argument('binary', help='test binary to execute')
- parser.add_argument('--gtest_parallel', help='Path to gtest-parallel to use.',
+ parser.add_argument('--gtest_parallel', help='path to gtest-parallel to use',
default='gtest_parallel')
parser.add_argument('-d', '--output_dir',
- required=args.timeline_report or args.upload,
+ required=args.timeline_report or args.upload or args.compare,
help='output directory for test logs')
parser.add_argument('-w', '--workers', help='number of gtest-parallel workers to spawn')
parser.add_argument('--db_url',
- required=args.timeline_report or args.upload,
+ required=args.timeline_report or args.upload or
+ (args.compare and not os.path.isdir(args.compare)),
help='MongoDB URL in a form "mongodb://server:port"')
parser.add_argument('--db_collection',
required=args.timeline_report or args.upload,
help=f'use collection name in {DATABASE} database',
- choices=COLLECTIONS)
+ choices=DB_COLLECTIONS)
parser.add_argument('--manifest',
help=f'extract commit information from build manifest')
parser.add_argument('--metadata',
default='',
help='remove or replace parts of log path')
+ parser.add_argument('--ref_db_collection',
+ required=args.compare and not os.path.isdir(args.compare),
+ help=f'use collection name in {DATABASE} database to query'
+ f' reference data',
+ choices=DB_COLLECTIONS)
+ parser.add_argument('--comparison_report',
+ required=args.compare,
+ help='create comparison report file name')
+
args = parser.parse_args()
logging.basicConfig(format="{file} %(levelname)s: %(message)s".format(
[args.binary] +
['--'] + binary_args)
- if args.upload or args.timeline_report:
+ if args.upload or args.timeline_report or args.compare:
# prepare commit information
append = {}
if args.manifest:
append.update(metadata_from_manifest(args.manifest))
if args.metadata:
append.update(json_load(args.metadata))
+
# prepare memcheck records from logs
logs = list(glob(os.path.join(args.output_dir, '**', '*.log'), recursive=True))
strip = args.strip_log_path.split(',') + ['']
# create timeline report
if args.timeline_report:
create_memcheck_report(records, args.db_url, args.db_collection, args.timeline_report)
- logging.info('Created memcheck report %s', args.timeline_report)
+ logging.info('Created memcheck timeline report %s', args.timeline_report)
+
+ # compare runs and prepare report
+ if args.compare:
+ if os.path.isdir(args.compare):
+ references = get_memcheck_records(source=args.compare)
+ else:
+ query = dict(item.split("=") for item in args.compare.split(","))
+ references = get_db_memcheck_records(query=query,
+ db_collection=args.ref_db_collection,
+ db_name=DATABASE, db_url=args.db_url)
+ compare_retcode = compare_memcheck_2_runs(cur_values=records, references=references,
+ output_file=args.comparison_report)
+ returncode = returncode if returncode else compare_retcode
+
sys.exit(returncode)