From 954cf9a7d485b729198c0f6aff1580b83f273397 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 2 Feb 2023 14:41:55 -0800 Subject: [PATCH] [mlgo][nfc] Refactor the log_reader.py utility Small refactoring in preparation for tests for the interactive mode. This allows reading the header, and performing observations, as explicit steps. The latter is in particular necessary because the exit condition for the interactive host will be that the child process (the compiler) exited. --- llvm/lib/Analysis/models/log_reader.py | 61 ++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Analysis/models/log_reader.py b/llvm/lib/Analysis/models/log_reader.py index 08342e5..0fe090c 100644 --- a/llvm/lib/Analysis/models/log_reader.py +++ b/llvm/lib/Analysis/models/log_reader.py @@ -4,10 +4,11 @@ See lib/Analysis/TrainingLogger.cpp for a description of the format. """ import ctypes import dataclasses +import io import json import math import sys -import typing +from typing import Optional _element_types = { 'float': ctypes.c_float, @@ -66,7 +67,7 @@ class TensorValue: return self._view[index] -def read_tensor(fs: typing.BinaryIO, ts: TensorSpec) -> TensorValue: +def read_tensor(fs: io.BufferedReader, ts: TensorSpec) -> TensorValue: size = math.prod(ts.shape) * ctypes.sizeof(ts.element_type) data = fs.read(size) return TensorValue(ts, data) @@ -75,30 +76,46 @@ def read_tensor(fs: typing.BinaryIO, ts: TensorSpec) -> TensorValue: def pretty_print_tensor_value(tv: TensorValue): print(f'{tv.spec().name}: {",".join([str(v) for v in tv])}') +def read_header(f: io.BufferedReader): + header = json.loads(f.readline()) + tensor_specs = [TensorSpec.from_dict(ts) for ts in header['features']] + score_spec = TensorSpec.from_dict( + header['score']) if 'score' in header else None + advice_spec = TensorSpec.from_dict( + header['advice']) if 'advice' in header else None + return tensor_specs, score_spec, advice_spec + + +def read_one_observation(context: Optional[str], + event_str: str, + f: io.BufferedReader, + tensor_specs: list[TensorSpec], + score_spec: Optional[TensorSpec]): + event = json.loads(event_str) + if 'context' in event: + context = event['context'] + event = json.loads(f.readline()) + observation_id = int(event['observation']) + features = [] + for ts in tensor_specs: + features.append(read_tensor(f, ts)) + f.readline() + score = None + if score_spec is not None: + score_header = json.loads(f.readline()) + assert int(score_header['outcome']) == observation_id + score = read_tensor(f, score_spec) + f.readline() + return context, observation_id, features, score + def read_stream(fname: str): - with open(fname, 'rb') as f: - header = json.loads(f.readline()) - tensor_specs = [TensorSpec.from_dict(ts) for ts in header['features']] - score_spec = TensorSpec.from_dict( - header['score']) if 'score' in header else None + with io.BufferedReader(io.FileIO(fname, 'rb')) as f: + tensor_specs, score_spec, _ = read_header(f) context = None while event_str := f.readline(): - event = json.loads(event_str) - if 'context' in event: - context = event['context'] - continue - observation_id = int(event['observation']) - features = [] - for ts in tensor_specs: - features.append(read_tensor(f, ts)) - f.readline() - score = None - if score_spec is not None: - score_header = json.loads(f.readline()) - assert int(score_header['outcome']) == observation_id - score = read_tensor(f, score_spec) - f.readline() + context, observation_id, features, score = read_one_observation( + context, event_str, f, tensor_specs, score_spec) yield context, observation_id, features, score -- 2.7.4