1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Library to make common google storage operations more reliable."""
7 from __future__ import print_function
21 from chromite.cbuildbot import constants
22 from chromite.lib import cache
23 from chromite.lib import cros_build_lib
24 from chromite.lib import osutils
25 from chromite.lib import retry_util
26 from chromite.lib import timeout_util
28 PUBLIC_BASE_HTTPS_URL = 'https://commondatastorage.googleapis.com/'
29 PRIVATE_BASE_HTTPS_URL = 'https://storage.cloud.google.com/'
32 # Format used by "gsutil ls -l" when reporting modified time.
33 DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
35 # Regexp for parsing each line of output from "gsutil ls -l".
36 # This regexp is prepared for the generation and meta_generation values,
37 # too, even though they are not expected until we use "-a".
39 # A detailed listing looks like:
40 # 99908 2014-03-01T05:50:08Z gs://bucket/foo/abc#1234 metageneration=1
41 # gs://bucket/foo/adir/
42 # 99908 2014-03-04T01:16:55Z gs://bucket/foo/def#5678 metageneration=1
43 # TOTAL: 2 objects, 199816 bytes (495.36 KB)
44 LS_LA_RE = re.compile(
45 r'^\s*(?P<content_length>\d*?)\s+'
46 r'(?P<creation_time>\S*?)\s+'
49 r'#(?P<generation>\d+)\s+'
50 r'meta_?generation=(?P<metageneration>\d+)'
52 LS_RE = re.compile(r'^\s*(?P<content_length>)(?P<creation_time>)(?P<url>.*)'
53 r'(?P<generation>)(?P<metageneration>)\s*$')
56 def CanonicalizeURL(url, strict=False):
57 """Convert provided URL to gs:// URL, if it follows a known format.
60 url: URL to canonicalize.
61 strict: Raises exception if URL cannot be canonicalized.
63 for prefix in (PUBLIC_BASE_HTTPS_URL, PRIVATE_BASE_HTTPS_URL):
64 if url.startswith(prefix):
65 return url.replace(prefix, BASE_GS_URL, 1)
67 if not url.startswith(BASE_GS_URL) and strict:
68 raise ValueError('Url %r cannot be canonicalized.' % url)
73 def GetGsURL(bucket, for_gsutil=False, public=True, suburl=''):
74 """Construct a Google Storage URL
77 bucket: The Google Storage bucket to use
78 for_gsutil: Do you want a URL for passing to `gsutil`?
79 public: Do we want the public or private url
80 suburl: A url fragment to tack onto the end
83 The fully constructed URL
88 urlbase = PUBLIC_BASE_HTTPS_URL if public else PRIVATE_BASE_HTTPS_URL
89 return '%s%s/%s' % (urlbase, bucket, suburl)
92 class GSContextException(Exception):
93 """Base exception for all exceptions thrown by GSContext."""
96 # Since the underlying code uses RunCommand, some callers might be trying to
97 # catch cros_build_lib.RunCommandError themselves. Extend that class so that
98 # code continues to work.
99 class GSCommandError(GSContextException, cros_build_lib.RunCommandError):
100 """Thrown when an error happened we couldn't decode."""
103 class GSContextPreconditionFailed(GSContextException):
104 """Thrown when google storage returns code=PreconditionFailed."""
107 class GSNoSuchKey(GSContextException):
108 """Thrown when google storage returns code=NoSuchKey."""
111 # Detailed results of GSContext.Stat.
113 # The fields directory correspond to gsutil stat results.
115 # Field name Type Example
116 # creation_time datetime Sat, 23 Aug 2014 06:53:20 GMT
117 # content_length int 74
118 # content_type string application/octet-stream
119 # hash_crc32c string BBPMPA==
120 # hash_md5 string ms+qSYvgI9SjXn8tW/5UpQ==
121 # etag string CNCgocbmqMACEAE=
122 # generation int 1408776800850000
123 # metageneration int 1
125 # Note: We omit a few stat fields as they are not always available, and we
126 # have no callers that want this currently.
128 # content_language string/None en # This field may be None.
129 GSStatResult = collections.namedtuple(
131 ('creation_time', 'content_length', 'content_type', 'hash_crc32c',
132 'hash_md5', 'etag', 'generation', 'metageneration'))
135 # Detailed results of GSContext.List.
136 GSListResult = collections.namedtuple(
138 ('url', 'creation_time', 'content_length', 'generation', 'metageneration'))
141 class GSCounter(object):
142 """A counter class for Google Storage."""
144 def __init__(self, ctx, path):
145 """Create a counter object.
148 ctx: A GSContext object.
149 path: The path to the counter in Google Storage.
155 """Get the current value of a counter."""
157 return int(self.ctx.Cat(self.path))
161 def AtomicCounterOperation(self, default_value, operation):
162 """Atomically set the counter value using |operation|.
165 default_value: Default value to use for counter, if counter
167 operation: Function that takes the current counter value as a
168 parameter, and returns the new desired value.
171 The new counter value. None if value could not be set.
173 generation, _ = self.ctx.GetGeneration(self.path)
174 for _ in xrange(self.ctx.retries + 1):
176 value = default_value if generation == 0 else operation(self.Get())
177 self.ctx.Copy('-', self.path, input=str(value), version=generation)
179 except (GSContextPreconditionFailed, GSNoSuchKey):
180 # GSContextPreconditionFailed is thrown if another builder is also
181 # trying to update the counter and we lost the race. GSNoSuchKey is
182 # thrown if another builder deleted the counter. In either case, fetch
183 # the generation again, and, if it has changed, try the copy again.
184 new_generation, _ = self.ctx.GetGeneration(self.path)
185 if new_generation == generation:
187 generation = new_generation
190 """Increment the counter.
193 The new counter value. None if value could not be set.
195 return self.AtomicCounterOperation(1, lambda x: x + 1)
198 """Decrement the counter.
201 The new counter value. None if value could not be set.
203 return self.AtomicCounterOperation(-1, lambda x: x - 1)
206 """Reset the counter to zero.
209 The new counter value. None if value could not be set.
211 return self.AtomicCounterOperation(0, lambda x: 0)
213 def StreakIncrement(self):
214 """Increment the counter if it is positive, otherwise set it to 1.
217 The new counter value. None if value could not be set.
219 return self.AtomicCounterOperation(1, lambda x: x + 1 if x > 0 else 1)
221 def StreakDecrement(self):
222 """Decrement the counter if it is negative, otherwise set it to -1.
225 The new counter value. None if value could not be set.
227 return self.AtomicCounterOperation(-1, lambda x: x - 1 if x < 0 else -1)
230 class GSContext(object):
231 """A class to wrap common google storage operations."""
233 # Error messages that indicate an invalid BOTO config.
234 AUTHORIZATION_ERRORS = ('no configured', 'detail=Authorization')
236 DEFAULT_BOTO_FILE = os.path.expanduser('~/.boto')
237 DEFAULT_GSUTIL_TRACKER_DIR = os.path.expanduser('~/.gsutil/tracker-files')
238 # This is set for ease of testing.
239 DEFAULT_GSUTIL_BIN = None
240 DEFAULT_GSUTIL_BUILDER_BIN = '/b/build/third_party/gsutil/gsutil'
241 # How many times to retry uploads.
244 # Multiplier for how long to sleep (in seconds) between retries; will delay
245 # (1*sleep) the first time, then (2*sleep), continuing via attempt * sleep.
246 DEFAULT_SLEEP_TIME = 60
248 GSUTIL_VERSION = '4.7pre_retrydns'
249 GSUTIL_TAR = 'gsutil_%s.tar.gz' % GSUTIL_VERSION
250 GSUTIL_URL = PUBLIC_BASE_HTTPS_URL + 'prerelease/%s' % GSUTIL_TAR
251 GSUTIL_API_SELECTOR = 'JSON'
253 RESUMABLE_UPLOAD_ERROR = ('Too many resumable upload attempts failed without '
255 RESUMABLE_DOWNLOAD_ERROR = ('Too many resumable download attempts failed '
259 def GetDefaultGSUtilBin(cls, cache_dir=None):
260 if cls.DEFAULT_GSUTIL_BIN is None:
261 if cache_dir is None:
262 # Import here to avoid circular imports (commandline imports gs).
263 from chromite.lib import commandline
264 cache_dir = commandline.GetCacheDir()
265 if cache_dir is not None:
266 common_path = os.path.join(cache_dir, constants.COMMON_CACHE)
267 tar_cache = cache.TarballCache(common_path)
268 key = (cls.GSUTIL_TAR,)
269 # The common cache will not be LRU, removing the need to hold a read
270 # lock on the cached gsutil.
271 ref = tar_cache.Lookup(key)
272 ref.SetDefault(cls.GSUTIL_URL)
273 cls.DEFAULT_GSUTIL_BIN = os.path.join(ref.path, 'gsutil', 'gsutil')
275 # Check if the default gsutil path for builders exists. If
276 # not, try locating gsutil. If none exists, simply use 'gsutil'.
277 gsutil_bin = cls.DEFAULT_GSUTIL_BUILDER_BIN
278 if not os.path.exists(gsutil_bin):
279 gsutil_bin = osutils.Which('gsutil')
280 if gsutil_bin is None:
281 gsutil_bin = 'gsutil'
282 cls.DEFAULT_GSUTIL_BIN = gsutil_bin
284 return cls.DEFAULT_GSUTIL_BIN
286 def __init__(self, boto_file=None, cache_dir=None, acl=None,
287 dry_run=False, gsutil_bin=None, init_boto=False, retries=None,
292 boto_file: Fully qualified path to user's .boto credential file.
293 cache_dir: The absolute path to the cache directory. Use the default
294 fallback if not given.
295 acl: If given, a canned ACL. It is not valid to pass in an ACL file
296 here, because most gsutil commands do not accept ACL files. If you
297 would like to use an ACL file, use the SetACL command instead.
298 dry_run: Testing mode that prints commands that would be run.
299 gsutil_bin: If given, the absolute path to the gsutil binary. Else
300 the default fallback will be used.
301 init_boto: If set to True, GSContext will check during __init__ if a
302 valid boto config is configured, and if not, will attempt to ask the
303 user to interactively set up the boto config.
304 retries: Number of times to retry a command before failing.
305 sleep: Amount of time to sleep between failures.
307 if gsutil_bin is None:
308 gsutil_bin = self.GetDefaultGSUtilBin(cache_dir)
310 self._CheckFile('gsutil not found', gsutil_bin)
311 self.gsutil_bin = gsutil_bin
313 # The version of gsutil is retrieved on demand and cached here.
314 self._gsutil_version = None
316 # Increase the number of retries. With 10 retries, Boto will try a total of
317 # 11 times and wait up to 2**11 seconds (~30 minutes) in total, not
318 # not including the time spent actually uploading or downloading.
319 self.gsutil_flags = ['-o', 'Boto:num_retries=10']
321 # Set HTTP proxy if environment variable http_proxy is set
322 # (crbug.com/325032).
323 if 'http_proxy' in os.environ:
324 url = urlparse.urlparse(os.environ['http_proxy'])
325 if not url.hostname or (not url.username and url.password):
326 logging.warning('GS_ERROR: Ignoring env variable http_proxy because it '
327 'is not properly set: %s', os.environ['http_proxy'])
329 self.gsutil_flags += ['-o', 'Boto:proxy=%s' % url.hostname]
331 self.gsutil_flags += ['-o', 'Boto:proxy_user=%s' % url.username]
333 self.gsutil_flags += ['-o', 'Boto:proxy_pass=%s' % url.password]
335 self.gsutil_flags += ['-o', 'Boto:proxy_port=%d' % url.port]
337 # Prefer boto_file if specified, else prefer the env then the default.
338 if boto_file is None:
339 boto_file = os.environ.get('BOTO_CONFIG')
340 if boto_file is None:
341 boto_file = self.DEFAULT_BOTO_FILE
342 self.boto_file = boto_file
346 self.dry_run = dry_run
347 self.retries = self.DEFAULT_RETRIES if retries is None else int(retries)
348 self._sleep_time = self.DEFAULT_SLEEP_TIME if sleep is None else int(sleep)
354 def gsutil_version(self):
355 """Return the version of the gsutil in this context."""
356 if not self._gsutil_version:
358 self._gsutil_version = self.GSUTIL_VERSION
360 cmd = ['-q', 'version']
362 # gsutil has been known to return version to stderr in the past, so
363 # use combine_stdout_stderr=True.
364 result = self.DoCommand(cmd, combine_stdout_stderr=True,
365 redirect_stdout=True)
367 # Expect output like: 'gsutil version 3.35' or 'gsutil version: 4.5'.
368 match = re.search(r'^\s*gsutil\s+version:?\s+([\d.]+)', result.output,
371 self._gsutil_version = match.group(1)
373 raise GSContextException('Unexpected output format from "%s":\n%s.' %
374 (result.cmdstr, result.output))
376 return self._gsutil_version
378 def _CheckFile(self, errmsg, afile):
379 """Pre-flight check for valid inputs.
382 errmsg: Error message to display.
383 afile: Fully qualified path to test file existance.
385 if not os.path.isfile(afile):
386 raise GSContextException('%s, %s is not a file' % (errmsg, afile))
389 """Quick test of gsutil functionality."""
390 result = self.DoCommand(['ls'], retries=0, debug_level=logging.DEBUG,
391 redirect_stderr=True, error_code_ok=True)
392 return not (result.returncode == 1 and
393 any(e in result.error for e in self.AUTHORIZATION_ERRORS))
395 def _ConfigureBotoConfig(self):
396 """Make sure we can access protected bits in GS."""
397 print('Configuring gsutil. **Please use your @google.com account.**')
399 self.DoCommand(['config'], retries=0, debug_level=logging.CRITICAL,
402 if (os.path.exists(self.boto_file) and not
403 os.path.getsize(self.boto_file)):
404 os.remove(self.boto_file)
405 raise GSContextException('GS config could not be set up.')
408 if not self._TestGSLs():
409 self._ConfigureBotoConfig()
411 def Cat(self, path, **kwargs):
412 """Returns the contents of a GS object."""
413 kwargs.setdefault('redirect_stdout', True)
414 if not path.startswith(BASE_GS_URL):
415 # gsutil doesn't support cat-ting a local path, so read it ourselves.
417 return osutils.ReadFile(path)
418 except Exception as e:
419 if getattr(e, 'errno', None) == errno.ENOENT:
420 raise GSNoSuchKey('%s: file does not exist' % path)
422 raise GSContextException(str(e))
426 return self.DoCommand(['cat', path], **kwargs).output
428 def CopyInto(self, local_path, remote_dir, filename=None, **kwargs):
429 """Upload a local file into a directory in google storage.
432 local_path: Local file path to copy.
433 remote_dir: Full gs:// url of the directory to transfer the file into.
434 filename: If given, the filename to place the content at; if not given,
435 it's discerned from basename(local_path).
436 **kwargs: See Copy() for documentation.
439 The generation of the remote file.
441 filename = filename if filename is not None else local_path
442 # Basename it even if an explicit filename was given; we don't want
443 # people using filename as a multi-directory path fragment.
444 return self.Copy(local_path,
445 '%s/%s' % (remote_dir, os.path.basename(filename)),
449 def GetTrackerFilenames(dest_path):
450 """Returns a list of gsutil tracker filenames.
452 Tracker files are used by gsutil to resume downloads/uploads. This
453 function does not handle parallel uploads.
456 dest_path: Either a GS path or an absolute local path.
459 The list of potential tracker filenames.
461 dest = urlparse.urlsplit(dest_path)
463 if dest.scheme == 'gs':
465 bucket_name = dest.netloc
466 object_name = dest.path.lstrip('/')
468 re.sub(r'[/\\]', '_', 'resumable_upload__%s__%s__%s.url' %
469 (bucket_name, object_name, GSContext.GSUTIL_API_SELECTOR)))
473 re.sub(r'[/\\]', '_', 'resumable_download__%s__%s.etag' %
474 (dest.path, GSContext.GSUTIL_API_SELECTOR)))
476 hashed_filenames = []
477 for filename in filenames:
478 if not isinstance(filename, unicode):
479 filename = unicode(filename, 'utf8').encode('utf-8')
480 m = hashlib.sha1(filename)
481 hashed_filenames.append('%s_TRACKER_%s.%s' %
482 (prefix, m.hexdigest(), filename[-16:]))
484 return hashed_filenames
486 def _RetryFilter(self, e):
487 """Function to filter retry-able RunCommandError exceptions.
490 e: Exception object to filter. Exception may be re-raised as
491 as different type, if _RetryFilter determines a more appropriate
492 exception type based on the contents of e.
495 True for exceptions thrown by a RunCommand gsutil that should be retried.
497 if not retry_util.ShouldRetryCommandCommon(e):
500 # e is guaranteed by above filter to be a RunCommandError
502 if e.result.returncode < 0:
503 logging.info('Child process received signal %d; not retrying.',
504 -e.result.returncode)
507 error = e.result.error
509 if 'PreconditionException' in error:
510 raise GSContextPreconditionFailed(e)
512 # If the file does not exist, one of the following errors occurs. The
513 # "stat" command leaves off the "CommandException: " prefix, but it also
514 # outputs to stdout instead of stderr and so will not be caught here
516 if ('CommandException: No URLs matched' in error or
517 'NotFoundException:' in error or
518 'One or more URLs matched no objects' in error):
521 logging.warning('GS_ERROR: %s', error)
523 # TODO: Below is a list of known flaky errors that we should
524 # retry. The list needs to be extended.
526 # Temporary fix: remove the gsutil tracker files so that our retry
527 # can hit a different backend. This should be removed after the
528 # bug is fixed by the Google Storage team (see crbug.com/308300).
529 RESUMABLE_ERROR_MESSAGE = (
530 self.RESUMABLE_DOWNLOAD_ERROR,
531 self.RESUMABLE_UPLOAD_ERROR,
532 'ResumableUploadException',
533 'ResumableUploadAbortException',
534 'ResumableDownloadException',
535 'ssl.SSLError: The read operation timed out',
536 'Unable to find the server',
537 'doesn\'t match cloud-supplied digest',
539 if any(x in error for x in RESUMABLE_ERROR_MESSAGE):
540 # Only remove the tracker files if we try to upload/download a file.
541 if 'cp' in e.result.cmd[:-2]:
542 # Assume a command: gsutil [options] cp [options] src_path dest_path
543 # dest_path needs to be a fully qualified local path, which is already
544 # required for GSContext.Copy().
545 tracker_filenames = self.GetTrackerFilenames(e.result.cmd[-1])
546 logging.info('Potential list of tracker files: %s',
548 for tracker_filename in tracker_filenames:
549 tracker_file_path = os.path.join(self.DEFAULT_GSUTIL_TRACKER_DIR,
551 if os.path.exists(tracker_file_path):
552 logging.info('Deleting gsutil tracker file %s before retrying.',
554 logging.info('The content of the tracker file: %s',
555 osutils.ReadFile(tracker_file_path))
556 osutils.SafeUnlink(tracker_file_path)
559 # We have seen flaky errors with 5xx return codes
560 # See b/17376491 for the "JSON decoding" error.
561 # We have seen transient Oauth 2.0 credential errors (crbug.com/414345).
562 TRANSIENT_ERROR_MESSAGE = (
563 'ServiceException: 5',
564 'Failure: No JSON object could be decoded',
565 'Oauth 2.0 User Account',
566 'InvalidAccessKeyId',
568 if any(x in error for x in TRANSIENT_ERROR_MESSAGE):
573 # TODO(mtennant): Make a private method.
574 def DoCommand(self, gsutil_cmd, headers=(), retries=None, version=None,
575 parallel=False, **kwargs):
576 """Run a gsutil command, suppressing output, and setting retry/sleep.
579 gsutil_cmd: The (mostly) constructed gsutil subcommand to run.
580 headers: A list of raw headers to pass down.
581 parallel: Whether gsutil should enable parallel copy/update of multiple
582 files. NOTE: This option causes gsutil to use significantly more
583 memory, even if gsutil is only uploading one file.
584 retries: How many times to retry this command (defaults to setting given
586 version: If given, the generation; essentially the timestamp of the last
587 update. Note this is not the same as sequence-number; it's
588 monotonically increasing bucket wide rather than reset per file.
589 The usage of this is if we intend to replace/update only if the version
590 is what we expect. This is useful for distributed reasons- for example,
591 to ensure you don't overwrite someone else's creation, a version of
592 0 states "only update if no version exists".
595 A RunCommandResult object.
597 kwargs = kwargs.copy()
598 kwargs.setdefault('redirect_stderr', True)
600 cmd = [self.gsutil_bin]
601 cmd += self.gsutil_flags
602 for header in headers:
603 cmd += ['-h', header]
604 if version is not None:
605 cmd += ['-h', 'x-goog-if-generation-match:%d' % int(version)]
607 # Enable parallel copy/update of multiple files if stdin is not to
608 # be piped to the command. This does not split a single file into
609 # smaller components for upload.
610 if parallel and kwargs.get('input') is None:
613 cmd.extend(gsutil_cmd)
616 retries = self.retries
618 extra_env = kwargs.pop('extra_env', {})
619 extra_env.setdefault('BOTO_CONFIG', self.boto_file)
622 logging.debug("%s: would've run: %s", self.__class__.__name__,
623 cros_build_lib.CmdToStr(cmd))
626 return retry_util.GenericRetry(self._RetryFilter,
627 retries, cros_build_lib.RunCommand,
628 cmd, sleep=self._sleep_time,
629 extra_env=extra_env, **kwargs)
630 except cros_build_lib.RunCommandError as e:
631 raise GSCommandError(e.msg, e.result, e.exception)
633 def Copy(self, src_path, dest_path, acl=None, recursive=False,
634 skip_symlinks=True, auto_compress=False, **kwargs):
635 """Copy to/from GS bucket.
637 Canned ACL permissions can be specified on the gsutil cp command line.
640 https://developers.google.com/storage/docs/accesscontrol#applyacls
643 src_path: Fully qualified local path or full gs:// path of the src file.
644 dest_path: Fully qualified local path or full gs:// path of the dest
646 acl: One of the google storage canned_acls to apply.
647 recursive: Whether to copy recursively.
648 skip_symlinks: Skip symbolic links when copying recursively.
649 auto_compress: Automatically compress with gzip when uploading.
652 The generation of the remote file.
655 RunCommandError if the command failed despite retries.
657 # -v causes gs://bucket/path#generation to be listed in output.
660 # Certain versions of gsutil (at least 4.3) assume the source of a copy is
661 # a directory if the -r option is used. If it's really a file, gsutil will
662 # look like it's uploading it but not actually do anything. We'll work
663 # around that problem by surpressing the -r flag if we detect the source
665 if recursive and not os.path.isfile(src_path):
671 # Pass the suffix without the '.' as that is what gsutil wants.
672 suffix = os.path.splitext(src_path)[1]
674 raise ValueError('src file "%s" needs an extension to compress' %
676 cmd += ['-z', suffix[1:]]
678 acl = self.acl if acl is None else acl
682 with cros_build_lib.ContextManagerStack() as stack:
683 # Write the input into a tempfile if possible. This is needed so that
684 # gsutil can retry failed requests.
685 if src_path == '-' and kwargs.get('input') is not None:
686 f = stack.Add(tempfile.NamedTemporaryFile)
687 f.write(kwargs['input'])
692 cmd += ['--', src_path, dest_path]
694 if not (src_path.startswith(BASE_GS_URL) or
695 dest_path.startswith(BASE_GS_URL)):
696 # Don't retry on local copies.
697 kwargs.setdefault('retries', 0)
699 kwargs['capture_output'] = True
701 result = self.DoCommand(cmd, **kwargs)
705 # Now we parse the output for the current generation number. Example:
706 # Created: gs://chromeos-throw-away-bucket/foo#1360630664537000.1
707 m = re.search(r'Created: .*#(\d+)([.](\d+))?$', result.error)
709 return int(m.group(1))
712 except GSNoSuchKey as e:
713 # If the source was a local file, the error is a quirk of gsutil 4.5
714 # and should be ignored. If the source was remote, there might
715 # legitimately be no such file. See crbug.com/393419.
716 if os.path.isfile(src_path):
717 # pylint: disable=E1101
718 return e.args[0].result
721 # TODO: Merge LS() and List()?
722 def LS(self, path, **kwargs):
723 """Does a directory listing of the given gs path.
726 path: The path to get a listing of.
727 kwargs: See options that DoCommand takes.
730 A list of paths that matched |path|. Might be more than one if a
731 directory or path include wildcards/etc...
736 if not path.startswith(BASE_GS_URL):
737 # gsutil doesn't support listing a local path, so just run 'ls'.
738 kwargs.pop('retries', None)
739 kwargs.pop('headers', None)
740 result = cros_build_lib.RunCommand(['ls', path], **kwargs)
741 return result.output.splitlines()
743 return [x.url for x in self.List(path, **kwargs)]
745 def List(self, path, details=False, **kwargs):
746 """Does a directory listing of the given gs path.
749 path: The path to get a listing of.
750 details: Whether to include size/timestamp info.
751 kwargs: See options that DoCommand takes.
754 A list of GSListResult objects that matched |path|. Might be more
755 than one if a directory or path include wildcards/etc...
766 # We always request the extended details as the overhead compared to a plain
767 # listing is negligible.
768 kwargs['redirect_stdout'] = True
769 lines = self.DoCommand(cmd, **kwargs).output.splitlines()
772 # The last line is expected to be a summary line. Ignore it.
778 # Handle optional fields.
779 intify = lambda x: int(x) if x else None
781 # Parse out each result and build up the results list.
783 match = ls_re.search(line)
785 raise GSContextException('unable to parse line: %s' % line)
786 if match.group('creation_time'):
787 timestamp = datetime.datetime.strptime(match.group('creation_time'),
792 ret.append(GSListResult(
793 content_length=intify(match.group('content_length')),
794 creation_time=timestamp,
795 url=match.group('url'),
796 generation=intify(match.group('generation')),
797 metageneration=intify(match.group('metageneration'))))
801 def GetSize(self, path, **kwargs):
802 """Returns size of a single object (local or GS)."""
803 if not path.startswith(BASE_GS_URL):
804 return os.path.getsize(path)
806 return self.Stat(path, **kwargs).content_length
808 def Move(self, src_path, dest_path, **kwargs):
809 """Move/rename to/from GS bucket.
812 src_path: Fully qualified local path or full gs:// path of the src file.
813 dest_path: Fully qualified local path or full gs:// path of the dest file.
815 cmd = ['mv', '--', src_path, dest_path]
816 return self.DoCommand(cmd, **kwargs)
818 def SetACL(self, upload_url, acl=None):
819 """Set access on a file already in google storage.
822 upload_url: gs:// url that will have acl applied to it.
823 acl: An ACL permissions file or canned ACL.
827 raise GSContextException(
828 "SetAcl invoked w/out a specified acl, nor a default acl.")
831 self.DoCommand(['acl', 'set', acl, upload_url])
833 def ChangeACL(self, upload_url, acl_args_file=None, acl_args=None):
834 """Change access on a file already in google storage with "acl ch".
837 upload_url: gs:// url that will have acl applied to it.
838 acl_args_file: A file with arguments to the gsutil acl ch command. The
839 arguments can be spread across multiple lines. Comments
840 start with a # character and extend to the end of the
841 line. Exactly one of this argument or acl_args must be
843 acl_args: A list of arguments for the gsutil acl ch command. Exactly
844 one of this argument or acl_args must be set.
846 if acl_args_file and acl_args:
847 raise GSContextException(
848 'ChangeACL invoked with both acl_args and acl_args set.')
849 if not acl_args_file and not acl_args:
850 raise GSContextException(
851 'ChangeACL invoked with neither acl_args nor acl_args set.')
854 lines = osutils.ReadFile(acl_args_file).splitlines()
855 # Strip out comments.
856 lines = [x.split('#', 1)[0].strip() for x in lines]
857 acl_args = ' '.join([x for x in lines if x]).split()
859 self.DoCommand(['acl', 'ch'] + acl_args + [upload_url])
861 def Exists(self, path, **kwargs):
862 """Checks whether the given object exists.
865 path: Local path or gs:// url to check.
866 kwargs: Flags to pass to DoCommand.
869 True if the path exists; otherwise returns False.
871 if not path.startswith(BASE_GS_URL):
872 return os.path.exists(path)
875 self.Stat(path, **kwargs)
881 def Remove(self, path, recurse=False, ignore_missing=False, **kwargs):
882 """Remove the specified file.
885 path: Full gs:// url of the file to delete.
886 recurse: Remove recursively starting at path. Same as rm -R. Defaults
888 ignore_missing: Whether to suppress errors about missing files.
889 kwargs: Flags to pass to DoCommand.
896 self.DoCommand(cmd, **kwargs)
898 if not ignore_missing:
901 def GetGeneration(self, path):
902 """Get the generation and metageneration of the given |path|.
905 A tuple of the generation and metageneration.
908 res = self.Stat(path)
912 return res.generation, res.metageneration
914 def Stat(self, path, **kwargs):
915 """Stat a GS file, and get detailed information.
918 path: A GS path for files to Stat. Wildcards are NOT supported.
919 kwargs: Flags to pass to DoCommand.
922 A GSStatResult object with all fields populated.
925 Assorted GSContextException exceptions.
928 res = self.DoCommand(['stat', path], redirect_stdout=True, **kwargs)
929 except GSCommandError as e:
930 # Because the 'gsutil stat' command returns errors on stdout (unlike other
931 # commands), we have to look for standard errors ourselves.
932 # That behavior is different from any other command and is handled
933 # here specially. See b/16020252.
934 if e.result.output.startswith('No URLs matched'):
935 raise GSNoSuchKey(path)
937 # No idea what this is, so just choke.
940 # In dryrun mode, DoCommand doesn't return an object, so we need to fake
941 # out the behavior ourselves.
944 creation_time=datetime.datetime.now(),
946 content_type='application/octet-stream',
947 hash_crc32c='AAAAAA==',
953 # We expect Stat output like the following. However, the Content-Language
954 # line appears to be optional based on how the file in question was
957 # gs://bucket/path/file:
958 # Creation time: Sat, 23 Aug 2014 06:53:20 GMT
959 # Content-Language: en
961 # Content-Type: application/octet-stream
962 # Hash (crc32c): BBPMPA==
963 # Hash (md5): ms+qSYvgI9SjXn8tW/5UpQ==
964 # ETag: CNCgocbmqMACEAE=
965 # Generation: 1408776800850000
968 if not res.output.startswith('gs://'):
969 raise GSContextException('Unexpected stat output: %s' % res.output)
972 m = re.search(r'%s:\s*(.+)' % re.escape(name), res.output)
976 raise GSContextException('Field "%s" missing in "%s"' %
980 creation_time=datetime.datetime.strptime(
981 _GetField('Creation time'), '%a, %d %b %Y %H:%M:%S %Z'),
982 content_length=int(_GetField('Content-Length')),
983 content_type=_GetField('Content-Type'),
984 hash_crc32c=_GetField('Hash (crc32c)'),
985 hash_md5=_GetField('Hash (md5)'),
986 etag=_GetField('ETag'),
987 generation=int(_GetField('Generation')),
988 metageneration=int(_GetField('Metageneration')))
990 def Counter(self, path):
991 """Return a GSCounter object pointing at a |path| in Google Storage.
994 path: The path to the counter in Google Storage.
996 return GSCounter(self, path)
998 def WaitForGsPaths(self, paths, timeout, period=10):
999 """Wait until a list of files exist in GS.
1002 paths: The list of files to wait for.
1003 timeout: Max seconds to wait for file to appear.
1004 period: How often to check for files while waiting.
1007 timeout_util.TimeoutError if the timeout is reached.
1009 # Copy the list of URIs to wait for, so we don't modify the callers context.
1010 pending_paths = paths[:]
1012 def _CheckForExistence():
1013 pending_paths[:] = [x for x in pending_paths if not self.Exists(x)]
1015 def _Retry(_return_value):
1016 # Retry, if there are any pending paths left.
1017 return pending_paths
1019 timeout_util.WaitForSuccess(_Retry, _CheckForExistence,
1020 timeout=timeout, period=period)
1023 @contextlib.contextmanager
1024 def TemporaryURL(prefix):
1025 """Context manager to generate a random URL.
1027 At the end, the URL will be deleted.
1029 md5 = hashlib.md5(os.urandom(20))
1030 md5.update(cros_build_lib.UserDateTimeFormat())
1031 url = '%s/chromite-temp/%s/%s/%s' % (constants.TRASH_BUCKET, prefix,
1032 getpass.getuser(), md5.hexdigest())
1034 ctx.Remove(url, ignore_missing=True, recurse=True)
1038 ctx.Remove(url, ignore_missing=True, recurse=True)