1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Common Google Storage interface library."""
7 from __future__ import print_function
17 fixup_path.FixupPath()
19 from chromite.lib import gs
20 from chromite.lib import osutils
21 from chromite.lib.paygen import filelib
22 from chromite.lib.paygen import utils
27 GS_LS_STATUS_RE = re.compile(r'status=(\d+)')
29 # Gsutil is filled in by "FindGsUtil" on first invocation.
34 """Find which gsutil executuable to use.
36 This may download and cache the command if needed, and will return the
37 version pinned by chromite for general use. Will cache the result after
40 This function is multi-process safe, but NOT THREAD SAFE. If you need
41 to use gsutil functionality in threads, call this function at least
42 once before creating the threads. That way the value will be safely
46 Full path to the gsutil command to use.
48 # TODO(dgarrett): This is a hack. Merge chromite and crostools to fix.
50 # pylint: disable=W0603
53 GSUTIL = gs.GSContext.GetDefaultGSUtilBin()
58 class GsutilError(Exception):
59 """Base exception for errors where gsutil cannot be used for any reason."""
62 class GsutilMissingError(GsutilError):
63 """Returned when the gsutil utility is missing from PATH."""
64 def __init__(self, msg='The gsutil utility must be installed.'):
65 GsutilError.__init__(self, msg)
68 class GSLibError(Exception):
69 """Raised when gsutil command runs but gives an error."""
72 class CopyFail(GSLibError):
73 """Raised if Copy fails in any way."""
76 class MoveFail(GSLibError):
77 """Raised if Move fails in any way."""
80 class RemoveFail(GSLibError):
81 """Raised if Remove fails in any way."""
84 class AclFail(GSLibError):
85 """Raised if SetAcl fails in any way."""
88 class CatFail(GSLibError):
89 """Raised if Cat fails in any way."""
92 class StatFail(GSLibError):
93 """Raised if Stat fails in any way."""
96 class BucketOperationError(GSLibError):
97 """Raised when a delete or create bucket command fails."""
100 class URIError(GSLibError):
101 """Raised when URI does not behave as expected."""
104 class ValidateGsutilFailure(GSLibError):
105 """We are unable to validate that gsutil is working correctly."""
108 def RetryGSLib(func):
109 """Decorator to retry function calls that throw an exception.
111 If the decorated method throws a GSLibError exception, the exception
112 will be thrown away and the function will be run again until all retries
113 are exhausted. On the final attempt, the exception will be thrown normally.
115 Three attempts in total will be made to run the function (one more
116 than RETRY_ATTEMPTS).
119 def MyFunctionHere(): pass
121 def RetryHandler(*args, **kwargs):
122 """Retry func with given args/kwargs RETRY_ATTEMPTS times."""
124 for i in xrange(0, RETRY_ATTEMPTS + 1):
126 return func(*args, **kwargs)
127 except GSLibError as ex:
128 # On the last try just pass the exception on up.
129 if i >= RETRY_ATTEMPTS:
133 RESUMABLE_ERROR_MESSAGE = (
134 gs.GSContext.RESUMABLE_DOWNLOAD_ERROR,
135 gs.GSContext.RESUMABLE_UPLOAD_ERROR,
136 'ResumableUploadException',
137 'ResumableDownloadException',
138 'ssl.SSLError: The read operation timed out',
140 if (func.__name__ == 'Copy' and
141 any(x in error_msg for x in RESUMABLE_ERROR_MESSAGE)):
143 'Resumable download/upload exception occured for %s', args[1])
144 # Pass the dest_path to get the tracker filename.
145 tracker_filenames = gs.GSContext.GetTrackerFilenames(args[1])
146 # This part of the code is copied from chromite.lib.gs with
147 # slight modifications. This is a temporary solution until
148 # we can deprecate crostools.lib.gslib (crbug.com/322740).
149 logging.info('Potential list of tracker files: %s',
151 for tracker_filename in tracker_filenames:
152 tracker_file_path = os.path.join(
153 gs.GSContext.DEFAULT_GSUTIL_TRACKER_DIR,
155 if os.path.exists(tracker_file_path):
156 logging.info('Deleting gsutil tracker file %s before retrying.',
158 logging.info('The content of the tracker file: %s',
159 osutils.ReadFile(tracker_file_path))
160 osutils.SafeUnlink(tracker_file_path)
162 if 'AccessDeniedException' in str(ex) or 'NoSuchKey' in str(ex):
165 # Record a warning message to be issued if a retry actually helps.
166 warning_msgs.append('Try %d failed with error message:\n%s' %
169 # If the func succeeded, then log any accumulated warning messages.
171 logging.warning('Failed %s %d times before success:\n%s',
172 func.__name__, len(warning_msgs),
173 '\n'.join(warning_msgs))
175 RetryHandler.__module__ = func.__module__
176 RetryHandler.__name__ = func.__name__
177 RetryHandler.__doc__ = func.__doc__
181 def RunGsutilCommand(args,
182 redirect_stdout=True,
183 redirect_stderr=True,
184 failed_exception=GSLibError,
187 get_headers_from_stdout=False,
189 """Run gsutil with given args through RunCommand with given options.
191 Generally this method is intended for use within this module, see the various
192 command-specific wrappers provided for convenience. However, it can be called
193 directly if 'gsutil' needs to be called in specific way.
195 A few of the options for RunCommand have their default values switched for
196 this function. Those options are called out explicitly as options here, while
197 addition RunCommand options can be used through extra_run_command_opts.
200 args: List of arguments to use with 'gsutil'.
201 redirect_stdout: Boolean option passed directly to RunCommand.
202 redirect_stderr: Boolean option passed directly to RunCommand.
203 failed_exception: Exception class to raise if CommandFailedException is
204 caught. It should be GSLibError or a subclass.
205 generation: Only run the specified command if the generation matches.
206 (See "Conditional Updates Using Object Versioning" in the gsutil docs.)
207 headers: Fill in this dictionary with header values captured from stderr.
208 get_headers_from_stdout: Whether header information is to be parsed from
209 stdout (default: stderr).
210 kwargs: Additional options to pass directly to RunCommand, beyond the
211 explicit ones above. See RunCommand itself.
214 Anything that RunCommand returns, which should be a CommandResult object.
217 GsutilMissingError is the gsutil utility cannot be found.
218 GSLibError (or whatever is in failed_exception) if RunCommand failed (and
219 error_ok was not True).
221 # The -d flag causes gsutil to dump various metadata, including user
222 # credentials. We therefore don't allow users to pass it in directly.
223 assert '-d' not in args, 'Cannot pass in the -d flag directly'
225 gsutil = FindGsUtil()
227 if generation is not None:
228 args = ['-h', 'x-goog-if-generation-match:%s' % generation] + args
229 if headers is not None:
231 assert redirect_stderr
232 cmd = [gsutil] + args
233 run_opts = {'redirect_stdout': redirect_stdout,
234 'redirect_stderr': redirect_stderr,
236 run_opts.update(kwargs)
238 # Always use RunCommand with return_result on, which will be the default
239 # behavior for RunCommand itself someday.
240 run_opts['return_result'] = True
243 result = utils.RunCommand(cmd, **run_opts)
245 if e.errno == errno.ENOENT:
246 raise GsutilMissingError()
248 except utils.CommandFailedException as e:
249 # If headers is set, we have to hide the output here because it may contain
250 # credentials that we don't want to show in buildbot logs.
251 raise failed_exception('%r failed' % cmd if headers else e)
253 if headers is not None and result is not None:
254 assert (redirect_stdout if get_headers_from_stdout else redirect_stderr)
255 # Parse headers that look like this:
256 # header: x-goog-generation: 1359148994758000
257 # header: x-goog-metageneration: 1
258 headers_source = result.output if get_headers_from_stdout else result.error
259 for line in headers_source.splitlines():
260 if line.startswith('header: '):
261 header, _, value = line.partition(': ')[-1].partition(': ')
262 headers[header.replace('x-goog-', '')] = value
264 # Strip out stderr entirely to avoid showing credentials in logs; for
265 # commands that dump credentials to stdout, clobber that as well.
266 result.error = '<stripped>'
267 if get_headers_from_stdout:
268 result.output = '<stripped>'
273 def ValidateGsutilWorking(bucket):
274 """Validate that gsutil is working correctly.
276 There is a failure mode for gsutil in which all operations fail, and this
277 is indistinguishable from all gsutil ls operations matching nothing. We
278 check that there is at least one file in the root of the bucket.
281 bucket: bucket we are about to test.
284 ValidateGsutilFailure: If we are unable to find any files in the bucket.
286 url = 'gs://%s/' % bucket
288 raise ValidateGsutilFailure('Unable to find anything in: %s' % url)
291 def GetGsutilVersion():
292 """Return the version string for the installed gsutil utility.
298 GsutilMissingError if gsutil cannot be found.
299 GSLibError for any other error.
303 # As of version 3.26, a quirk of 'gsutil version' is that if gsutil is
304 # outdated it will ask if you want to update (Y/n) before proceeding... but
305 # do it only the first time (for a particular update? I'm not exactly sure).
306 # Prepare a 'n' answer just in case.
309 result = RunGsutilCommand(args, error_ok=False, input=user_input)
311 output = '\n'.join(o for o in [result.output, result.error] if o)
314 match = re.search(r'^\s*gsutil\s+version\s+([\d\.]+)', output,
317 return match.group(1)
319 logging.error('Unexpected output format from %r:\n%s',
320 result.cmdstr, output)
321 raise GSLibError('Unexpected output format from %r.' % result.cmdstr)
324 logging.error('No stdout output from %r.', result.cmdstr)
325 raise GSLibError('No stdout output from %r.', result.cmdstr)
329 """Update the gsutil utility to the latest version.
332 The updated version, if updated, otherwise None.
335 GSLibError if any error occurs.
337 original_version = GetGsutilVersion()
338 updated_version = None
340 # If an update is available the 'gsutil update' command will ask
341 # whether to continue. Reply with 'y'.
345 result = RunGsutilCommand(args, error_ok=True, input=user_input)
347 if result.returncode != 0:
348 # Oddly, 'gsutil update' exits with error if no update is needed.
349 # Check the output to see if this is the situation, in which case the
350 # error is harmless (and expected). Last line in stderr will be:
351 # "You already have the latest gsutil release installed."
353 raise GSLibError('Failed command: %r' % result.cmdstr)
355 last_error_line = result.error.splitlines()[-1]
356 if not last_error_line.startswith('You already have'):
357 raise GSLibError(result.error)
360 current_version = GetGsutilVersion()
361 if current_version != original_version:
362 updated_version = current_version
364 return updated_version
369 """Read the gsutil md5 sum from etag and gsutil ls -L.
371 Note that because this relies on 'gsutil ls -L' it suffers from the
372 eventual consistency issue, meaning this function could fail to find
373 the MD5 value for a recently created file in Google Storage.
376 gs_uri: An absolute Google Storage URI that refers directly to an object.
377 No globs are supported.
380 A string that is an md5sum, or None if no object found.
383 GSLibError if the gsutil command fails. If there is no object at that path
384 that is not considered a failure.
386 gs_md5_regex = re.compile(r'.*?Hash \(md5\):\s+(.*)', re.IGNORECASE)
387 args = ['ls', '-L', gs_uri]
389 result = RunGsutilCommand(args, error_ok=True)
391 # If object was not found then output is completely empty.
392 if not result.output:
395 for line in result.output.splitlines():
396 match = gs_md5_regex.match(line)
398 # gsutil now prints the MD5 sum in base64, but we want it in hex.
399 return base64.b16encode(base64.b64decode(match.group(1))).lower()
401 # This means there was some actual failure in the command.
402 raise GSLibError('Unable to determine MD5Sum for %r' % gs_uri)
406 def Cmp(path1, path2):
407 """Return True if paths hold identical files, according to MD5 sum.
409 Note that this function relies on MD5Sum, which means it also can only
410 promise eventual consistency. A recently uploaded file in Google Storage
411 may behave badly in this comparison function.
413 If either file is missing then always return False.
416 path1: URI to a file. Local paths also supported.
417 path2: URI to a file. Local paths also supported.
420 True if files are the same, False otherwise.
422 md5_1 = MD5Sum(path1) if IsGsURI(path1) else filelib.MD5Sum(path1)
426 md5_2 = MD5Sum(path2) if IsGsURI(path2) else filelib.MD5Sum(path2)
428 return md5_1 == md5_2
432 def Copy(src_path, dest_path, acl=None, **kwargs):
433 """Run gsutil cp src_path dest_path supporting GS globs.
436 gsutil cp /etc/* gs://etc/ where /etc/* is src_path with a glob and
437 gs://etc is dest_path.
439 This assumes that the src or dest path already exist.
442 src_path: The src of the path to copy, either a /unix/path or gs:// uri.
443 dest_path: The dest of the path to copy, either a /unix/path or gs:// uri.
444 acl: an ACL argument (predefined name or XML file) to pass to gsutil
445 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
446 explicit ones above. See RunGsutilCommand itself.
449 CopyFail: If the copy fails for any reason.
454 args += [src_path, dest_path]
455 RunGsutilCommand(args, failed_exception=CopyFail, **kwargs)
459 def Move(src_path, dest_path, **kwargs):
460 """Run gsutil mv src_path dest_path supporting GS globs.
462 Note that the created time is changed to now for the moved object(s).
465 src_path: The src of the path to move, either a /unix/path or gs:// uri.
466 dest_path: The dest of the path to move, either a /unix/path or gs:// uri.
467 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
468 explicit ones above. See RunGsutilCommand itself.
471 MoveFail: If the move fails for any reason.
473 args = ['mv', src_path, dest_path]
474 RunGsutilCommand(args, failed_exception=MoveFail, **kwargs)
476 # pylint: disable-msg=C9011
479 def Remove(*paths, **kwargs):
480 """Run gsutil rm on path supporting GS globs.
483 paths: Local path or gs URI, or list of same.
484 ignore_no_match: If True, then do not complain if anything was not
485 removed because no URI match was found. Like rm -f. Defaults to False.
486 recurse: Remove recursively starting at path. Same as rm -R. Defaults
488 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
489 explicit ones above. See RunGsutilCommand itself.
492 RemoveFail: If the remove fails for any reason.
494 ignore_no_match = kwargs.pop('ignore_no_match', False)
495 recurse = kwargs.pop('recurse', False)
505 RunGsutilCommand(args, failed_exception=RemoveFail, **kwargs)
506 except RemoveFail as e:
507 if not (ignore_no_match and 'No URLs matched' in str(e.args[0])):
511 def RemoveDirContents(gs_dir_uri):
512 """Remove all contents of a directory.
515 gs_dir_uri: directory to delete contents of.
517 Remove(os.path.join(gs_dir_uri, '**'), ignore_no_match=True)
520 def CreateWithContents(gs_uri, contents, **kwargs):
521 """Creates the specified file with specified contents.
524 gs_uri: The URI of a file on Google Storage.
525 contents: Contents to write to the file.
526 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
527 explicit ones above. See RunGsutilCommand itself.
530 CopyFail: If it fails for any reason.
532 with utils.CreateTempFileWithContents(contents) as content_file:
533 Copy(content_file.name, gs_uri, **kwargs)
536 def Cat(gs_uri, **kwargs):
537 """Return the contents of a file at the given GS URI
540 gs_uri: The URI of a file on Google Storage.
541 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
542 explicit ones above. See RunGsutilCommand itself.
545 CatFail: If the cat fails for any reason.
547 args = ['cat', gs_uri]
548 result = RunGsutilCommand(args, failed_exception=CatFail, **kwargs)
552 def Stat(gs_uri, **kwargs):
553 """Stats a file at the given GS URI (returns nothing).
556 gs_uri: The URI of a file on Google Storage.
557 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
558 explicit ones above. See RunGsutilCommand itself.
561 StatFail: If the stat fails for any reason.
563 args = ['stat', gs_uri]
564 # IMPORTANT! With stat, header information is dumped to standard output,
565 # rather than standard error, as with other gsutil commands. Hence,
566 # get_headers_from_stdout must be True to ensure both correct parsing of
567 # output and stripping of sensitive information.
568 RunGsutilCommand(args, failed_exception=StatFail,
569 get_headers_from_stdout=True, **kwargs)
573 """Returns true if the path begins with gs://
576 path: An absolute Google Storage URI.
579 True if path is really a google storage uri that begins with gs://
582 return path and path.startswith(PROTOCOL + '://')
585 def SplitGSUri(gs_uri):
586 """Returns tuple (bucket, uri_remainder) from GS URI.
588 Examples: 1) 'gs://foo/hi/there' returns ('foo', 'hi/there')
589 2) 'gs://foo/hi/there/' returns ('foo', 'hi/there/')
590 3) 'gs://foo' returns ('foo', '')
591 4) 'gs://foo/' returns ('foo', '')
594 gs_uri: A Google Storage URI.
597 A tuple (bucket, uri_remainder)
600 URIError if URI is not in recognized format
602 match = re.search(r'^gs://([^/]+)/?(.*)$', gs_uri)
604 return (match.group(1), match.group(2))
606 raise URIError('Bad GS URI: %r' % gs_uri)
609 # TODO(mtennant): Rename this "Size" for consistency.
611 def FileSize(gs_uri, **kwargs):
612 """Return the size of the given gsutil file in bytes.
615 gs_uri: Google Storage URI (beginning with 'gs://') pointing
616 directly to a single file.
617 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
618 explicit ones above. See RunGsutilCommand itself.
621 Size of file in bytes.
624 URIError: Raised when URI is unknown to Google Storage or when
625 URI matches more than one file.
629 Stat(gs_uri, headers=headers, **kwargs)
630 except StatFail as e:
631 raise URIError('Unable to stat file at URI %r: %s' % (gs_uri, e))
633 size_str = headers.get('stored-content-length')
635 raise URIError('Failed to get size of %r' % gs_uri)
640 def FileTimestamp(gs_uri, **kwargs):
641 """Return the timestamp of the given gsutil file.
644 gs_uri: Google Storage URI (beginning with 'gs://') pointing
645 directly to a single file.
646 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
647 explicit ones above. See RunGsutilCommand itself.
650 datetime of the files creation, or None
653 URIError: Raised when URI is unknown to Google Storage or when
654 URI matches more than one file.
656 args = ['ls', '-l', gs_uri]
658 result = RunGsutilCommand(args, **kwargs)
659 ls_lines = result.output.splitlines()
661 # We expect one line per file and a summary line.
662 if len(ls_lines) != 2:
663 raise URIError('More than one file matched URI %r' % gs_uri)
665 # Should have the format:
666 # <filesize> <date> <filepath>
667 return datetime.datetime.strptime(ls_lines[0].split()[1],
670 raise URIError('Unable to locate file at URI %r' % gs_uri)
673 def ExistsLazy(gs_uri, **kwargs):
674 """Return True if object exists at given GS URI.
676 Warning: This can return false negatives, because 'gsutil ls' relies on
677 a cache that is only eventually consistent. But it is faster to run, and
678 it does accept URIs with glob expressions, where Exists does not.
681 gs_uri: Google Storage URI
682 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
683 explicit ones above. See RunGsutilCommand itself.
686 True if object exists and False otherwise.
689 URIError if there is a problem with the URI other than the URI
692 args = ['ls', gs_uri]
694 RunGsutilCommand(args, **kwargs)
696 except GSLibError as e:
697 # If the URI was simply not found, the output should be something like:
698 # CommandException: One or more URLs matched no objects.
700 if not msg.startswith('CommandException: '):
706 def Exists(gs_uri, **kwargs):
707 """Return True if object exists at given GS URI.
710 gs_uri: Google Storage URI. Must be a fully-specified URI with
711 no glob expression. Even if a glob expression matches this
712 method will return False.
713 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
714 explicit ones above. See RunGsutilCommand itself.
717 True if gs_uri points to an existing object, and False otherwise.
720 Stat(gs_uri, **kwargs)
728 def List(root_uri, recurse=False, filepattern=None, sort=False):
729 """Return list of file and directory paths under given root URI.
732 root_uri: e.g. gs://foo/bar
733 recurse: Look in subdirectories, as well
734 filepattern: glob pattern to match against basename of path
735 sort: If True then do a default sort on paths
738 List of GS URIs to paths that matched
742 # In gs file patterns '**' absorbs any number of directory names,
744 gs_uri = gs_uri.rstrip('/') + '/**'
746 # Now match the filename itself at the end of the URI.
748 gs_uri = gs_uri.rstrip('/') + '/' + filepattern
750 args = ['ls', gs_uri]
753 result = RunGsutilCommand(args)
754 paths = [path for path in result.output.splitlines() if path]
757 paths = sorted(paths)
761 except GSLibError as e:
762 # The ls command will fail under normal operation if there was just
763 # nothing to be found. That shows up like this to stderr:
764 # CommandException: One or more URLs matched no objects.
765 if 'CommandException: One or more URLs matched no objects.' not in str(e):
768 # Otherwise, assume a normal error.
769 # TODO(mtennant): It would be more functionally correct to return this
770 # if and only if the error is identified as a "file not found" error.
771 # We simply have to determine how to do that reliably.
775 def ListFiles(root_uri, recurse=False, filepattern=None, sort=False):
776 """Return list of file paths under given root URI.
778 Directories are intentionally excluded.
781 root_uri: e.g. gs://foo/bar
782 recurse: Look for files in subdirectories, as well
783 filepattern: glob pattern to match against basename of file
784 sort: If True then do a default sort on paths
787 List of GS URIs to files that matched
789 paths = List(root_uri, recurse=recurse, filepattern=filepattern, sort=sort)
791 # Directory paths should be excluded from output, per ListFiles guarantee.
792 return [path for path in paths if not path.endswith('/')]
795 def ListDirs(root_uri, recurse=False, filepattern=None, sort=False):
796 """Return list of dir paths under given root URI.
798 File paths are intentionally excluded. The root_uri itself is excluded.
801 root_uri: e.g. gs://foo/bar
802 recurse: Look for directories in subdirectories, as well
803 filepattern: glob pattern to match against basename of director
804 sort: If True then do a default sort on paths
807 List of GS URIs to directories that matched
809 paths = List(root_uri, recurse=recurse, filepattern=filepattern, sort=sort)
811 # Only include directory paths in output, per ListDirs guarantee.
812 return [path for path in paths if path.endswith('/')]
816 def SetACL(gs_uri, acl_file, **kwargs):
817 """Set the ACLs of a file in Google Storage.
820 gs_uri: The GS URI to set the ACL on.
821 acl_file: A Google Storage xml ACL file.
822 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
823 explicit ones above. See RunGsutilCommand itself.
826 True if the ACL was successfully set
829 AclFail: If SetACL fails for any reason.
831 args = ['setacl', acl_file, gs_uri]
832 RunGsutilCommand(args, failed_exception=AclFail, **kwargs)
836 def CreateBucket(bucket, **kwargs):
837 """Create a Google Storage bucket using the users default credentials.
840 bucket: The name of the bucket to create.
841 kwargs: Additional options to pass directly to RunGsutilCommand, beyond the
842 explicit ones above. See RunGsutilCommand itself.
845 The GS URI of the bucket created.
848 BucketOperationError if the bucket is not created properly.
850 gs_uri = 'gs://%s' % bucket
851 args = ['mb', gs_uri]
853 RunGsutilCommand(args, **kwargs)
854 except GSLibError as e:
855 raise BucketOperationError('Error creating bucket %s.\n%s' % (bucket, e))
861 def DeleteBucket(bucket):
862 """Delete a Google Storage bucket using the users default credentials.
864 Warning: All contents will be deleted.
867 bucket: The name of the bucket to create.
870 BucketOperationError if the bucket is not created properly.
872 bucket = bucket.strip('/')
873 gs_uri = 'gs://%s' % bucket
875 RunGsutilCommand(['rm', '%s/*' % gs_uri], error_ok=True)
876 RunGsutilCommand(['rb', gs_uri])
878 except GSLibError as e:
879 raise BucketOperationError('Error deleting bucket %s.\n%s' % (bucket, e))