2 # Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Support generic spreadsheet-like table information."""
12 from chromite.lib import cros_build_lib
16 """Class to represent column headers and rows of data."""
18 __slots__ = ['_column_set', # Set of column headers (for faster lookup)
19 '_columns', # List of column headers in order
20 '_name', # Name to associate with table
21 '_rows', # List of row dicts
26 CSV_BQ = '__BEGINQUOTE__'
27 CSV_EQ = '__ENDQUOTE__'
30 def _SplitCSVLine(line):
31 '''Split a single CSV line into separate values.
33 Behavior illustrated by the following examples, with all but
34 the last example taken from Google Docs spreadsheet behavior:
35 'a,b,c,d': ==> ['a', 'b', 'c', 'd'],
36 'a, b, c, d': ==> ['a', ' b', ' c', ' d'],
37 'a,b,c,': ==> ['a', 'b', 'c', ''],
38 'a,"b c",d': ==> ['a', 'b c', 'd'],
39 'a,"b, c",d': ==> ['a', 'b, c', 'd'],
40 'a,"b, c, d",e': ==> ['a', 'b, c, d', 'e'],
41 'a,"""b, c""",d': ==> ['a', '"b, c"', 'd'],
42 'a,"""b, c"", d",e': ==> ['a', '"b, c", d', 'e'],
43 'a,b\,c,d': ==> ['a', 'b,c', 'd'],
45 Return a list of values.
47 # Split on commas, handling two special cases:
48 # 1) Escaped commas are not separators.
49 # 2) A quoted value can have non-separator commas in it. Quotes
52 for val in re.split(r'(?<!\\),', line):
57 # Handle regular double quotes at beginning/end specially.
59 val = Table.CSV_BQ + val[1:]
60 if val[-1] == '"' and (val[-2] != '"' or val[-3] == '"'):
61 val = val[0:-1] + Table.CSV_EQ
63 # Remove escape characters now.
64 val = val.replace(r'\,', ',') # \ before ,
65 val = val.replace('""', '"') # " before " (Google Spreadsheet syntax)
67 prevval = vals[-1] if vals else None
69 # If previous value started with quote and ended without one, then
70 # the current value is just a continuation of the previous value.
71 if prevval and prevval.startswith(Table.CSV_BQ):
72 val = prevval + "," + val
73 # Once entire value is read, strip surrounding quotes
74 if val.endswith(Table.CSV_EQ):
75 vals[-1] = val[len(Table.CSV_BQ):-len(Table.CSV_EQ)]
78 elif val.endswith(Table.CSV_EQ):
79 vals.append(val[len(Table.CSV_BQ):-len(Table.CSV_EQ)])
83 # If an unpaired Table.CSV_BQ is still in vals, then replace with ".
84 vals = [val.replace(Table.CSV_BQ, '"') for val in vals]
89 def LoadFromCSV(csv_file, name=None):
90 """Create a new Table object by loading contents of |csv_file|."""
91 if type(csv_file) is file:
92 file_handle = csv_file
94 file_handle = open(csv_file, 'r')
97 for line in file_handle:
101 vals = Table._SplitCSVLine(line)
105 table = Table(vals, name=name)
109 table.AppendRow(vals)
113 def __init__(self, columns, name=None):
114 self._columns = columns
115 self._column_set = set(columns)
120 """Return a table-like string representation of this table."""
121 cols = ['%10s' % col for col in self._columns]
122 text = 'Columns: %s\n' % ', '.join(cols)
125 for row in self._rows:
126 vals = ['%10s' % row[col] for col in self._columns]
127 text += 'Row %3d: %s\n' % (ix, ', '.join(vals))
131 def __nonzero__(self):
132 """Define boolean equivalent for this table."""
133 return bool(self._columns)
136 """Length of table equals the number of rows."""
137 return self.GetNumRows()
139 def __eq__(self, other):
140 """Return true if two tables are equal."""
141 # pylint: disable=W0212
142 return self._columns == other._columns and self._rows == other._rows
144 def __ne__(self, other):
145 """Return true if two tables are not equal."""
146 return not self == other
148 def __getitem__(self, index):
149 """Access one or more rows by index or slice."""
150 return self.GetRowByIndex(index)
152 def __delitem__(self, index):
153 """Delete one or more rows by index or slice."""
154 self.RemoveRowByIndex(index)
157 """Declare that this class supports iteration (over rows)."""
158 return self._rows.__iter__()
161 """Return name associated with table, None if not available."""
164 def SetName(self, name):
165 """Set the name associated with table."""
169 """Remove all row data."""
172 def GetNumRows(self):
173 """Return the number of rows in the table."""
174 return len(self._rows)
176 def GetNumColumns(self):
177 """Return the number of columns in the table."""
178 return len(self._columns)
180 def GetColumns(self):
181 """Return list of column names in order."""
182 return list(self._columns)
184 def GetRowByIndex(self, index):
185 """Access one or more rows by index or slice.
187 If more than one row is returned they will be contained in a list.
189 return self._rows[index]
191 def _GenRowFilter(self, id_values):
192 """Return a method that returns true for rows matching |id_values|."""
194 """Filter function for rows with id_values."""
195 for key in id_values:
196 if id_values[key] != row.get(key, None):
201 def GetRowsByValue(self, id_values):
202 """Return list of rows matching key/value pairs in |id_values|."""
203 # If row retrieval by value is heavily used for larger tables, then
204 # the implementation should change to be more efficient, at the
205 # expense of some pre-processing and extra storage.
206 grep = self._GenRowFilter(id_values)
207 return [r for r in self._rows if grep(r)]
209 def GetRowIndicesByValue(self, id_values):
210 """Return list of indices for rows matching k/v pairs in |id_values|."""
211 grep = self._GenRowFilter(id_values)
213 for ix, row in enumerate(self._rows):
219 def _PrepareValuesForAdd(self, values):
220 """Prepare a |values| dict/list to be added as a row.
222 If |values| is a dict, verify that only supported column
223 values are included. Add empty string values for columns
224 not seen in the row. The original dict may be altered.
226 If |values| is a list, translate it to a dict using known
227 column order. Append empty values as needed to match number
230 Return prepared dict.
232 if isinstance(values, dict):
234 if not col in self._column_set:
235 raise LookupError("Tried adding data to unknown column '%s'" % col)
237 for col in self._columns:
238 if not col in values:
239 values[col] = self.EMPTY_CELL
241 elif isinstance(values, list):
242 if len(values) > len(self._columns):
243 raise LookupError("Tried adding row with too many columns")
244 if len(values) < len(self._columns):
245 shortage = len(self._columns) - len(values)
246 values.extend([self.EMPTY_CELL] * shortage)
248 values = dict(zip(self._columns, values))
252 def AppendRow(self, values):
253 """Add a single row of data to the table, according to |values|.
255 The |values| argument can be either a dict or list.
257 row = self._PrepareValuesForAdd(values)
258 self._rows.append(row)
260 def SetRowByIndex(self, index, values):
261 """Replace the row at |index| with values from |values| dict."""
262 row = self._PrepareValuesForAdd(values)
263 self._rows[index] = row
265 def RemoveRowByIndex(self, index):
266 """Remove the row at |index|."""
267 del self._rows[index]
269 def HasColumn(self, name):
270 """Return True if column |name| is in this table, False otherwise."""
271 return name in self._column_set
273 def GetColumnIndex(self, name):
274 """Return the column index for column |name|, -1 if not found."""
275 for ix, col in enumerate(self._columns):
280 def GetColumnByIndex(self, index):
281 """Return the column name at |index|"""
282 return self._columns[index]
284 def InsertColumn(self, index, name, value=None):
285 """Insert a new column |name| into table at index |index|.
287 If |value| is specified, all rows will have |value| in the new column.
288 Otherwise, they will have the EMPTY_CELL value.
290 if self.HasColumn(name):
291 raise LookupError("Column %s already exists in table." % name)
293 self._columns.insert(index, name)
294 self._column_set.add(name)
296 for row in self._rows:
297 row[name] = value if value is not None else self.EMPTY_CELL
299 def AppendColumn(self, name, value=None):
300 """Same as InsertColumn, but new column is appended after existing ones."""
301 self.InsertColumn(self.GetNumColumns(), name, value)
303 def ProcessRows(self, row_processor):
304 """Invoke |row_processor| on each row in sequence."""
305 for row in self._rows:
308 def MergeTable(self, other_table, id_columns, merge_rules=None,
309 allow_new_columns=False, key=None, reverse=False,
311 """Merge |other_table| into this table, identifying rows by |id_columns|.
313 The |id_columns| argument can either be a list of identifying columns names
314 or a single column name (string). The values in these columns will be used
315 to identify the existing row that each row in |other_table| should be
318 The |merge_rules| specify what to do when there is a merge conflict. Every
319 column where a conflict is anticipated should have an entry in the
320 |merge_rules| dict. The value should be one of:
321 'join_with:<text>| = Join the two conflicting values with <text>
322 'accept_this_val' = Keep value in 'this' table and discard 'other' value.
323 'accept_other_val' = Keep value in 'other' table and discard 'this' value.
324 function = Keep return value from function(col_name, this_val, other_val)
326 A default merge rule can be specified with the key '__DEFAULT__' in
329 By default, the |other_table| must not have any columns that don't already
330 exist in this table. To allow new columns to be creating by virtue of their
331 presence in |other_table| set |allow_new_columns| to true.
333 To sort the final merged table, supply |key| and |reverse| arguments exactly
334 as they work with the Sort method.
336 # If requested, allow columns in other_table to create new columns
337 # in this table if this table does not already have them.
338 if allow_new_columns:
339 # pylint: disable=W0212
340 for ix, col in enumerate(other_table._columns):
341 if not self.HasColumn(col):
342 # Create a merge_rule on the fly for this new column.
345 merge_rules[col] = 'accept_other_val'
348 self.InsertColumn(0, col)
350 prevcol = other_table._columns[ix - 1]
351 previx = self.GetColumnIndex(prevcol)
352 self.InsertColumn(previx + 1, col)
354 for other_row in other_table:
355 self._MergeRow(other_row, id_columns, merge_rules=merge_rules)
357 # Optionally re-sort the merged table.
359 self.Sort(key, reverse=reverse)
362 self.SetName(new_name)
363 elif self.GetName() and other_table.GetName():
364 self.SetName(self.GetName() + ' + ' + other_table.GetName())
366 def _GetIdValuesForRow(self, row, id_columns):
367 """Return a dict with values from |row| in |id_columns|."""
368 id_values = dict((col, row[col]) for col in
369 cros_build_lib.iflatten_instance(id_columns))
372 def _MergeRow(self, other_row, id_columns, merge_rules=None):
373 """Merge |other_row| into this table.
375 See MergeTables for description of |id_columns| and |merge_rules|.
377 id_values = self._GetIdValuesForRow(other_row, id_columns)
379 row_indices = self.GetRowIndicesByValue(id_values)
381 row_index = row_indices[0]
382 row = self.GetRowByIndex(row_index)
383 for col in other_row:
385 # Find the merge rule that applies to this column, if there is one.
388 merge_rule = merge_rules.get(col, None)
389 if not merge_rule and merge_rules:
390 merge_rule = merge_rules.get('__DEFAULT__', None)
393 val = self._MergeColValue(col, row[col], other_row[col],
394 merge_rule=merge_rule)
396 msg = "Failed to merge '%s' value in row %r" % (col, id_values)
397 print >> sys.stderr, msg
403 # Cannot add new columns to row this way.
404 raise LookupError("Tried merging data to unknown column '%s'" % col)
405 self.SetRowByIndex(row_index, row)
407 self.AppendRow(other_row)
409 def _MergeColValue(self, col, val, other_val, merge_rule):
410 """Merge |col| values |val| and |other_val| according to |merge_rule|.
412 See MergeTable method for explanation of option |merge_rule|.
418 raise ValueError("Cannot merge column values without rule: '%s' vs '%s'" %
420 elif inspect.isfunction(merge_rule):
422 return merge_rule(col, val, other_val)
424 pass # Fall through to exception at end
425 elif merge_rule == 'accept_this_val':
427 elif merge_rule == 'accept_other_val':
430 match = re.match(r'join_with:(.+)$', merge_rule)
432 return match.group(1).join(v for v in (val, other_val) if v)
434 raise ValueError("Invalid merge rule (%s) for values '%s' and '%s'." %
435 (merge_rule, val, other_val))
437 def Sort(self, key, reverse=False):
438 """Sort the rows using the given |key| function."""
439 self._rows.sort(key=key, reverse=reverse)
441 def WriteCSV(self, filehandle, hiddencols=None):
442 """Write this table out as comma-separated values to |filehandle|.
444 To skip certain columns during the write, use the |hiddencols| set.
447 """Filter function for columns not in hiddencols."""
448 return not hiddencols or col not in hiddencols
450 cols = [col for col in self._columns if ColFilter(col)]
451 filehandle.write(','.join(cols) + '\n')
452 for row in self._rows:
453 vals = [row.get(col, self.EMPTY_CELL) for col in cols]
454 filehandle.write(','.join(vals) + '\n')