2 # Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Library for interacting with gdata (i.e. Google Docs, Tracker, etc)."""
14 import xml.dom.minidom
16 import gdata.projecthosting.client
18 import gdata.spreadsheet.service
20 from chromite.lib import operation
22 # pylint: disable=W0201,E0203,E1101
24 TOKEN_FILE = os.path.join(os.environ['HOME'], '.gdata_token')
25 CRED_FILE = os.path.join(os.environ['HOME'], '.gdata_cred.txt')
27 oper = operation.Operation('gdata_lib')
29 _BAD_COL_CHARS_REGEX = re.compile(r'[ /_]')
30 def PrepColNameForSS(col):
31 """Translate a column name for spreadsheet interface."""
32 # Spreadsheet interface requires column names to be
33 # all lowercase and with no spaces or other special characters.
34 return _BAD_COL_CHARS_REGEX.sub('', col.lower())
37 # TODO(mtennant): Rename PrepRowValuesForSS
38 def PrepRowForSS(row):
39 """Make sure spreadsheet handles all values in row as strings."""
40 return dict((key, PrepValForSS(val)) for key, val in row.items())
43 # Regex to detect values that the spreadsheet will auto-format as numbers.
44 _NUM_REGEX = re.compile(r'^[\d\.]+$')
45 def PrepValForSS(val):
46 """Make sure spreadsheet handles this value as a string."""
47 # The main reason for this is version strings (e.g. for portage packages),
48 # which Sheets automatically interprets as numbers and mangles.
49 if val and _NUM_REGEX.match(val):
54 def ScrubValFromSS(val):
55 """Remove string indicator prefix if found."""
56 if val and val[0] == "'":
62 """Class to manage user/password credentials."""
65 'docs_auth_token', # Docs Client auth token string
66 'creds_dirty', # True if user/password set and not, yet, saved
67 'password', # User password
68 'token_dirty', # True if auth token(s) set and not, yet, saved
69 'tracker_auth_token', # Tracker Client auth token string
70 'user', # User account (foo@chromium.org)
73 SAVED_TOKEN_ATTRS = ('docs_auth_token', 'tracker_auth_token', 'user')
79 self.docs_auth_token = None
80 self.tracker_auth_token = None
82 self.token_dirty = False
83 self.creds_dirty = False
85 def SetDocsAuthToken(self, auth_token):
86 """Set the Docs auth_token string."""
87 self.docs_auth_token = auth_token
88 self.token_dirty = True
90 def SetTrackerAuthToken(self, auth_token):
91 """Set the Tracker auth_token string."""
92 self.tracker_auth_token = auth_token
93 self.token_dirty = True
95 def LoadAuthToken(self, filepath):
96 """Load previously saved auth token(s) from |filepath|.
98 This first clears both docs_auth_token and tracker_auth_token.
100 self.docs_auth_token = None
101 self.tracker_auth_token = None
103 f = open(filepath, 'r')
106 if obj.has_key('auth_token'):
107 # Backwards compatability. Default 'auth_token' is what
108 # docs_auth_token used to be saved as.
109 self.docs_auth_token = obj['auth_token']
110 self.token_dirty = True
111 for attr in self.SAVED_TOKEN_ATTRS:
112 if obj.has_key(attr):
113 setattr(self, attr, obj[attr])
114 oper.Notice('Loaded Docs/Tracker auth token(s) from "%s"' % filepath)
116 oper.Error('Unable to load auth token file at "%s"' % filepath)
118 def StoreAuthTokenIfNeeded(self, filepath):
119 """Store auth token(s) to |filepath| if anything changed."""
121 self.StoreAuthToken(filepath)
123 def StoreAuthToken(self, filepath):
124 """Store auth token(s) to |filepath|."""
127 for attr in self.SAVED_TOKEN_ATTRS:
128 val = getattr(self, attr)
133 oper.Notice('Storing Docs and/or Tracker auth token to "%s"' % filepath)
134 f = open(filepath, 'w')
138 self.token_dirty = False
140 oper.Error('Unable to store auth token to file at "%s"' % filepath)
142 def SetCreds(self, user, password=None):
144 user = '%s@chromium.org' % user
147 password = getpass.getpass('Docs password for %s:' % user)
150 self.password = password
151 self.creds_dirty = True
153 def LoadCreds(self, filepath):
154 """Load email/password credentials from |filepath|."""
155 # Read email from first line and password from second.
157 with open(filepath, 'r') as f:
158 (self.user, self.password) = (l.strip() for l in f.readlines())
159 oper.Notice('Loaded Docs/Tracker login credentials from "%s"' % filepath)
161 def StoreCredsIfNeeded(self, filepath):
162 """Store email/password credentials to |filepath| if anything changed."""
164 self.StoreCreds(filepath)
166 def StoreCreds(self, filepath):
167 """Store email/password credentials to |filepath|."""
168 oper.Notice('Storing Docs/Tracker login credentials to "%s"' % filepath)
169 # Simply write email on first line and password on second.
170 with open(filepath, 'w') as f:
171 f.write(self.user + '\n')
172 f.write(self.password + '\n')
174 self.creds_dirty = False
177 class IssueComment(object):
178 """Represent a Tracker issue comment."""
180 __slots__ = ['title', 'text']
182 def __init__(self, title, text):
187 text = '<no comment>'
189 text = '\n '.join(self.text.split('\n'))
190 return '%s:\n %s' % (self.title, text)
194 """Represents one Tracker Issue."""
197 'comments': [], # List of IssueComment objects
198 'id': 0, # Issue id number (int)
199 'labels': [], # List of text labels
200 'owner': None, # Current owner (text, chromium.org account)
201 'status': None, # Current issue status (text) (e.g. Assigned)
202 'summary': None,# Issue summary (first comment)
203 'title': None, # Title text
207 __slots__ = SlotDefaults.keys()
209 def __init__(self, **kwargs):
210 """Init for one Issue object.
212 |kwargs| - key/value arguments to give initial values to
213 any additional attributes on |self|.
215 # Use SlotDefaults overwritten by kwargs for starting slot values.
216 slotvals = self.SlotDefaults.copy()
217 slotvals.update(kwargs)
218 for slot in self.__slots__:
219 setattr(self, slot, slotvals.pop(slot))
221 raise ValueError('I do not know what to do with %r' % slotvals)
224 """Pretty print of issue."""
225 lines = ['Issue %d - %s' % (self.id, self.title),
226 'Status: %s, Owner: %s' % (self.status, self.owner),
227 'Labels: %s' % ', '.join(self.labels),
231 lines.append('Summary: %s' % self.summary)
234 lines.extend(self.comments)
236 return '\n'.join(lines)
238 def InitFromTracker(self, t_issue, project_name):
239 """Initialize |self| from tracker issue |t_issue|"""
241 self.id = int(t_issue.id.text.split('/')[-1])
242 self.labels = [label.text for label in t_issue.label]
244 self.owner = t_issue.owner.username.text
245 self.status = t_issue.status.text
246 self.summary = t_issue.content.text
247 self.title = t_issue.title.text
248 self.comments = self.GetTrackerIssueComments(self.id, project_name)
250 def GetTrackerIssueComments(self, issue_id, project_name):
251 """Retrieve comments for |issue_id| from comments URL"""
254 feeds = 'http://code.google.com/feeds'
255 url = '%s/issues/p/%s/issues/%d/comments/full' % (feeds, project_name,
257 doc = xml.dom.minidom.parse(urllib.urlopen(url))
258 entries = doc.getElementsByTagName('entry')
259 for entry in entries:
261 for key in ('title', 'content'):
262 child = entry.getElementsByTagName(key)[0].firstChild
263 title_text_list.append(child.nodeValue if child else None)
264 comments.append(IssueComment(*title_text_list))
268 def __eq__(self, other):
269 return (self.id == other.id and self.labels == other.labels and
270 self.owner == other.owner and self.status == other.status and
271 self.summary == other.summary and self.title == other.title)
273 def __ne__(self, other):
274 return not self == other
276 class TrackerError(RuntimeError):
277 """Error class for tracker communication errors."""
280 class TrackerInvalidUserError(TrackerError):
281 """Error class for when user not recognized by Tracker."""
284 class TrackerComm(object):
285 """Class to manage communication with Tracker."""
288 'author', # Author when creating/editing Tracker issues
289 'it_client', # Issue Tracker client
290 'project_name', # Tracker project name
295 self.it_client = None
296 self.project_name = None
298 def Connect(self, creds, project_name, source='chromiumos'):
299 self.project_name = project_name
301 it_client = gdata.projecthosting.client.ProjectHostingClient()
302 it_client.source = source
304 if creds.tracker_auth_token:
305 oper.Notice('Logging into Tracker using previous auth token.')
306 it_client.auth_token = gdata.gauth.ClientLoginToken(
307 creds.tracker_auth_token)
309 oper.Notice('Logging into Tracker as "%s".' % creds.user)
310 it_client.ClientLogin(creds.user, creds.password,
311 source=source, service='code',
312 account_type='GOOGLE')
313 creds.SetTrackerAuthToken(it_client.auth_token.token_string)
315 self.author = creds.user
316 self.it_client = it_client
318 def _QueryTracker(self, query):
319 """Query the tracker for a list of issues. Return |None| on failure."""
321 return self.it_client.get_issues(self.project_name, query=query)
322 except gdata.client.RequestError:
325 def _CreateIssue(self, t_issue):
326 """Create an Issue from a Tracker Issue."""
328 issue.InitFromTracker(t_issue, self.project_name)
331 # TODO(mtennant): This method works today, but is not being actively used.
332 # Leaving it in, because a logical use of the method is for to verify
333 # that a Tracker issue in the package spreadsheet is open, and to add
334 # comments to it when new upstream versions become available.
335 def GetTrackerIssueById(self, tid):
336 """Get tracker issue given |tid| number. Return Issue object if found."""
338 query = gdata.projecthosting.client.Query(issue_id=str(tid))
339 feed = self._QueryTracker(query)
342 return self._CreateIssue(feed.entry[0])
345 def GetTrackerIssuesByText(self, search_text, full_text=True,
347 """Find all Tracker Issues that contain the text search_text."""
349 search_text = 'summary:"%s"' % search_text
351 search_text += ' is:open'
352 query = gdata.projecthosting.client.Query(text_query=search_text)
353 feed = self._QueryTracker(query)
355 return [self._CreateIssue(tissue) for tissue in feed.entry]
359 def CreateTrackerIssue(self, issue):
360 """Create a new issue in Tracker according to |issue|."""
362 created = self.it_client.add_issue(project_name=self.project_name,
364 content=issue.summary,
370 issue.id = int(created.id.text.split('/')[-1])
372 except gdata.client.RequestError as ex:
373 if ex.body and ex.body.lower() == 'user not found':
374 raise TrackerInvalidUserError('Tracker user %s not found' % issue.owner)
375 if ex.body and ex.body.lower() == 'issue owner must be a member':
376 raise TrackerInvalidUserError('Tracker user %s not a member' %
380 def AppendTrackerIssueById(self, issue_id, comment, owner=None):
381 """Append |comment| to issue |issue_id| in Tracker"""
382 self.it_client.update_issue(project_name=self.project_name,
390 class SpreadsheetRow(dict):
391 """Minor semi-immutable extension of dict to keep the original spreadsheet
392 row object and spreadsheet row number as attributes.
394 No changes are made to equality checking or anything else, so client code
395 that wishes to handle this as a pure dict can.
398 def __init__(self, ss_row_obj, ss_row_num, mapping=None):
400 dict.__init__(self, mapping)
402 self.ss_row_obj = ss_row_obj
403 self.ss_row_num = ss_row_num
405 def __setitem__(self, key, val):
406 raise TypeError('setting item in SpreadsheetRow not supported')
408 def __delitem__(self, key):
409 raise TypeError('deleting item in SpreadsheetRow not supported')
412 class SpreadsheetError(RuntimeError):
413 """Error class for spreadsheet communication errors."""
415 def ReadWriteDecorator(func):
416 """Raise SpreadsheetError if appropriate."""
417 def f(self, *args, **kwargs):
419 return func(self, *args, **kwargs)
420 except gdata.service.RequestError as ex:
421 raise SpreadsheetError(str(ex))
423 f.__name__ = func.__name__
426 class SpreadsheetComm(object):
427 """Class to manage communication with one Google Spreadsheet worksheet."""
429 # Row numbering in spreadsheets effectively starts at 2 because row 1
430 # has the column headers.
431 ROW_NUMBER_OFFSET = 2
433 # Spreadsheet column numbers start at 1.
434 COLUMN_NUMBER_OFFSET = 1
437 '_columns', # Tuple of translated column names, filled in as needed
438 '_rows', # Tuple of Row dicts in order, filled in as needed
439 'gd_client', # Google Data client
440 'ss_key', # Spreadsheet key
441 'ws_name', # Worksheet name
442 'ws_key', # Worksheet key
447 """The columns property is filled in on demand.
449 It is a tuple of column names, each run through PrepColNameForSS.
451 if self._columns is None:
452 query = gdata.spreadsheet.service.CellQuery()
453 query['max-row'] = '1'
454 feed = self.gd_client.GetCellsFeed(self.ss_key, self.ws_key, query=query)
456 # The use of PrepColNameForSS here looks weird, but the values
457 # in row 1 are the unaltered column names, rather than the restricted
458 # column names used for interface purposes. In other words, if the
459 # spreadsheet looks like it has a column called "Foo Bar", then the
460 # first row will have a value "Foo Bar" but all interaction with that
461 # column for other rows will use column key "foobar". Translate to
462 # restricted names now with PrepColNameForSS.
463 cols = [PrepColNameForSS(entry.content.text) for entry in feed.entry]
465 self._columns = tuple(cols)
471 """The rows property is filled in on demand.
473 It is a tuple of SpreadsheetRow objects.
475 if self._rows is None:
478 feed = self.gd_client.GetListFeed(self.ss_key, self.ws_key)
479 for rowIx, rowObj in enumerate(feed.entry, start=self.ROW_NUMBER_OFFSET):
480 row_dict = dict((key, ScrubValFromSS(val.text))
481 for key, val in rowObj.custom.iteritems())
482 rows.append(SpreadsheetRow(rowObj, rowIx, row_dict))
484 self._rows = tuple(rows)
489 for slot in self.__slots__:
490 setattr(self, slot, None)
492 def Connect(self, creds, ss_key, ws_name, source='chromiumos'):
493 """Login to spreadsheet service and set current worksheet.
495 |creds| Credentials object for Google Docs
496 |ss_key| Spreadsheet key
497 |ws_name| Worksheet name
498 |source| Name to associate with connecting service
500 self._Login(creds, source)
501 self.SetCurrentWorksheet(ws_name, ss_key=ss_key)
503 def SetCurrentWorksheet(self, ws_name, ss_key=None):
504 """Change the current worksheet. This clears all caches."""
505 if ss_key and ss_key != self.ss_key:
509 self.ws_name = ws_name
511 ws_key = self._GetWorksheetKey(self.ss_key, self.ws_name)
512 if ws_key != self.ws_key:
516 def _ClearCache(self, keep_columns=False):
517 """Called whenever column/row data might be stale."""
522 def _Login(self, creds, source):
523 """Login to Google doc client using given |creds|."""
524 gd_client = RetrySpreadsheetsService()
525 gd_client.source = source
527 # Login using previous auth token if available, otherwise
528 # use email/password from creds.
529 if creds.docs_auth_token:
530 oper.Notice('Logging into Docs using previous auth token.')
531 gd_client.SetClientLoginToken(creds.docs_auth_token)
533 oper.Notice('Logging into Docs as "%s".' % creds.user)
534 gd_client.email = creds.user
535 gd_client.password = creds.password
536 gd_client.ProgrammaticLogin()
537 creds.SetDocsAuthToken(gd_client.GetClientLoginToken())
539 self.gd_client = gd_client
541 def _GetWorksheetKey(self, ss_key, ws_name):
542 """Get the worksheet key with name |ws_name| in spreadsheet |ss_key|."""
543 feed = self.gd_client.GetWorksheetsFeed(ss_key)
544 # The worksheet key is the last component in the URL (after last '/')
545 for entry in feed.entry:
546 if ws_name == entry.title.text:
547 return entry.id.text.split('/')[-1]
549 oper.Die('Unable to find worksheet "%s" in spreadsheet "%s"' %
553 def GetColumns(self):
554 """Return tuple of column names in worksheet.
556 Note that each returned name has been run through PrepColNameForSS.
561 def GetColumnIndex(self, colName):
562 """Get the column index (starting at 1) for column |colName|"""
564 # Spreadsheet column indices start at 1, so +1.
565 return self.columns.index(colName) + self.COLUMN_NUMBER_OFFSET
571 """Return tuple of SpreadsheetRow objects in order."""
575 def GetRowCacheByCol(self, column):
576 """Return a dict for looking up rows by value in |column|.
578 Each row value is a SpreadsheetRow object.
579 If more than one row has the same value for |column|, then the
580 row objects will be in a list in the returned dict.
584 for row in self.GetRows():
585 col_val = row[column]
587 current_entry = row_cache.get(col_val, None)
588 if current_entry and type(current_entry) is list:
589 current_entry.append(row)
591 current_entry = [current_entry, row]
595 row_cache[col_val] = current_entry
600 def InsertRow(self, row):
601 """Insert |row| at end of spreadsheet."""
602 self.gd_client.InsertRow(row, self.ss_key, self.ws_key)
603 self._ClearCache(keep_columns=True)
606 def UpdateRowCellByCell(self, rowIx, row):
607 """Replace cell values in row at |rowIx| with those in |row| dict."""
609 colIx = self.GetColumnIndex(colName)
610 if colIx is not None:
611 self.ReplaceCellValue(rowIx, colIx, row[colName])
612 self._ClearCache(keep_columns=True)
615 def DeleteRow(self, ss_row):
616 """Delete the given |ss_row| (must be original spreadsheet row object."""
617 self.gd_client.DeleteRow(ss_row)
618 self._ClearCache(keep_columns=True)
621 def ReplaceCellValue(self, rowIx, colIx, val):
622 """Replace cell value at |rowIx| and |colIx| with |val|"""
623 self.gd_client.UpdateCell(rowIx, colIx, val, self.ss_key, self.ws_key)
624 self._ClearCache(keep_columns=True)
627 def ClearCellValue(self, rowIx, colIx):
628 """Clear cell value at |rowIx| and |colIx|"""
629 self.ReplaceCellValue(rowIx, colIx, None)
632 class RetrySpreadsheetsService(gdata.spreadsheet.service.SpreadsheetsService):
633 """Extend SpreadsheetsService to put retry logic around http request method.
635 The entire purpose of this class is to remove some flakiness from
636 interactions with Google Drive spreadsheet service, in the form of
637 certain 40* and 50* http error responses to http requests. This is
638 documented in https://code.google.com/p/chromium/issues/detail?id=206798.
639 There are two "request" methods that need to be wrapped in retry logic.
640 1) The request method on self. Original implementation is in
641 base class atom.service.AtomService.
642 2) The request method on self.http_client. The class of self.http_client
643 can actually vary, so the original implementation of the request
644 method can also vary.
646 # pylint: disable=R0904
649 RETRYABLE_STATUSES = (403, # Forbidden (but retries still seem to help).
650 500, # Internal server error.
653 def __init__(self, *args, **kwargs):
654 gdata.spreadsheet.service.SpreadsheetsService.__init__(self, *args,
657 # Wrap self.http_client.request with retry wrapper. This request method
658 # is used by ProgrammaticLogin(), at least.
659 if hasattr(self, 'http_client'):
660 self.http_client.request = functools.partial(self._RetryRequest,
661 self.http_client.request)
663 self.request = functools.partial(self._RetryRequest, self.request)
665 def _RetryRequest(self, func, *args, **kwargs):
666 """Retry wrapper for bound |func|, passing |args| and |kwargs|.
668 This retry wrapper can be used for any http request |func| that provides
669 an http status code via the .status attribute of the returned value.
671 Retry when the status value on the return object is in RETRYABLE_STATUSES,
672 and run up to TRY_MAX times. If successful (whether or not retries
673 were necessary) return the last return value returned from base method.
674 If unsuccessful return the first return value returned from base method.
677 for try_ix in xrange(1, self.TRY_MAX + 1):
678 retval = func(*args, **kwargs)
679 if retval.status not in self.RETRYABLE_STATUSES:
682 oper.Warning('Retry-able HTTP request failure (status=%d), try %d/%d' %
683 (retval.status, try_ix, self.TRY_MAX))
685 first_retval = retval
687 oper.Warning('Giving up on HTTP request after %d tries' % self.TRY_MAX)