dt = datetime.datetime(year, month, day, hour, minute, second, microsecond)
return dt
+
+def get_log_created_year(input_file):
+ """Get year from log file system timestamp
+ """
+
+ log_created_time = os.path.getctime(input_file)
+ log_created_year = datetime.datetime.fromtimestamp(log_created_time).year
+ return log_created_year
+
+
+def get_start_time(line_iterable, year):
+ """Find start time from group of lines
+ """
+
+ start_datetime = None
+ for line in line_iterable:
+ line = line.strip()
+ if line.find('Solving') != -1:
+ start_datetime = extract_datetime_from_line(line, year)
+ break
+ return start_datetime
+
+
def extract_seconds(input_file, output_file):
with open(input_file, 'r') as f:
lines = f.readlines()
- log_created_time = os.path.getctime(input_file)
- log_created_year = datetime.datetime.fromtimestamp(log_created_time).year
- start_time_found = False
+ log_created_year = get_log_created_year(input_file)
+ start_datetime = get_start_time(lines, log_created_year)
+ assert start_datetime, 'Start time not found'
+
out = open(output_file, 'w')
for line in lines:
line = line.strip()
- if not start_time_found and line.find('Solving') != -1:
- start_time_found = True
- start_datetime = extract_datetime_from_line(line, log_created_year)
if line.find('Iteration') != -1:
dt = extract_datetime_from_line(line, log_created_year)
elapsed_seconds = (dt - start_datetime).total_seconds()
--- /dev/null
+#!/usr/bin/env python
+
+"""
+Parse training log
+
+Competitor to parse_log.sh
+"""
+
+import os
+import re
+import extract_seconds
+import argparse
+
+
+def get_line_type(line):
+ """Return either 'test' or 'train depending on line type
+ """
+
+ line_type = None
+ if line.find('Train') != -1:
+ line_type = 'train'
+ elif line.find('Test') != -1:
+ line_type = 'test'
+ return line_type
+
+
+def parse_log(path_to_log):
+ """Parse log file
+ """
+
+ re_iteration = re.compile('Iteration (\d+)')
+ re_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)')
+ re_loss = re.compile('output #\d+: loss = ([\.\d]+)')
+ re_lr = re.compile('lr = ([\.\d]+)')
+
+ # Pick out lines of interest
+ iteration = -1
+ test_accuracy = -1
+ learning_rate = float('NaN')
+ train_list = []
+ test_list = []
+
+ logfile_year = extract_seconds.get_log_created_year(path_to_log)
+ with open(path_to_log) as f:
+ start_time = extract_seconds.get_start_time(f, logfile_year)
+
+ for line in f:
+ iteration_match = re_iteration.search(line)
+ if iteration_match:
+ iteration = float(iteration_match.group(1))
+ if iteration == -1:
+ # Only look for other stuff if we've found the first iteration
+ continue
+
+ time = extract_seconds.extract_datetime_from_line(line,
+ logfile_year)
+ seconds = (time - start_time).total_seconds()
+
+ lr_match = re_lr.search(line)
+ if lr_match:
+ learning_rate = float(lr_match.group(1))
+
+ accuracy_match = re_accuracy.search(line)
+ if accuracy_match:
+ test_accuracy = float(accuracy_match.group(1))
+
+ loss_match = re_loss.search(line)
+ if loss_match:
+ loss = float(loss_match.group(1))
+ line_type = get_line_type(line)
+ assert line_type, ('Failed to determine line type for line: ' +
+ line)
+ if line_type == 'test':
+ # NOTE: we assume that accuracy always comes right before
+ # loss for test data
+ test_list.append((iteration, seconds, test_accuracy, loss))
+ elif line_type == 'train':
+ train_list.append((iteration, seconds, loss, learning_rate))
+
+ return train_list, test_list
+
+
+def save_csv_files(logfile_path, output_dir, train_list, test_list,
+ verbose=False):
+ """Save CSV files to output_dir
+
+ If the input log file is, e.g., caffe.INFO, the names will be
+ caffe.INFO.train and caffe.INFO.test
+ """
+
+ log_basename = os.path.basename(logfile_path)
+ train_filename = os.path.join(output_dir, log_basename + '.train')
+ write_csv(train_filename, train_list, '%d,%f,%f,%f',
+ 'NumIters,Seconds,TrainingLoss,LearningRate', verbose)
+
+ test_filename = os.path.join(output_dir, log_basename + '.test')
+ write_csv(test_filename, test_list, '%d,%f,%f,%f',
+ 'NumIters,Seconds,TestAccuracy,TestLoss', verbose)
+
+
+def write_csv(output_filename, list_of_tuples, format_string, header,
+ verbose=False):
+ """Write a CSV file
+ """
+ with open(output_filename, 'w') as f:
+ f.write(header + '\n')
+ for row in list_of_tuples:
+ line = format_string % row
+ f.write(line + '\n')
+ if verbose:
+ print 'Wrote %s' % output_filename
+
+
+def parse_args():
+ description = ('Parse a Caffe training log into two CSV files '
+ 'representing training and testing information')
+ parser = argparse.ArgumentParser(description=description)
+
+ parser.add_argument('logfile_path',
+ help='Path to log file')
+
+ parser.add_argument('output_dir',
+ help='Directory in which to place output CSV files')
+
+ parser.add_argument('--verbose',
+ action='store_true',
+ help='Print some extra info (e.g., output filenames)')
+
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+ train_list, test_list = parse_log(args.logfile_path)
+ save_csv_files(args.logfile_path, args.output_dir, train_list, test_list)
+
+
+if __name__ == '__main__':
+ main()