"""Archives a set of files or directories to a server."""
-__version__ = '0.3'
+__version__ = '0.3.2'
import functools
import hashlib
import threading
import time
import urllib
+import urlparse
import zlib
from third_party import colorama
from utils import threading_utils
from utils import tools
+import auth
+
# Version of isolate protocol passed to the server in /handshake request.
ISOLATE_PROTOCOL_VERSION = '1.0'
raise NotImplementedError()
+class _PushState(object):
+ """State needed to call .push(), to be stored in Item.push_state.
+
+ Note this needs to be a global class to support pickling.
+ """
+
+ def __init__(self, upload_url, finalize_url):
+ self.upload_url = upload_url
+ self.finalize_url = finalize_url
+ self.uploaded = False
+ self.finalized = False
+
+
class IsolateServer(StorageApi):
"""StorageApi implementation that downloads and uploads to Isolate Server.
It uploads and downloads directly from Google Storage whenever appropriate.
"""
- class _PushState(object):
- """State needed to call .push(), to be stored in Item.push_state."""
- def __init__(self, upload_url, finalize_url):
- self.upload_url = upload_url
- self.finalize_url = finalize_url
- self.uploaded = False
- self.finalized = False
-
def __init__(self, base_url, namespace):
super(IsolateServer, self).__init__()
assert base_url.startswith('http'), base_url
headers={'Range': 'bytes=%d-' % offset} if offset else None)
if not connection:
- raise IOError('Unable to open connection to %s' % source_url)
+ raise IOError('Request failed - %s' % source_url)
# If |offset| is used, verify server respects it by checking Content-Range.
if offset:
def push(self, item, content):
assert isinstance(item, Item)
- assert isinstance(item.push_state, IsolateServer._PushState)
+ assert isinstance(item.push_state, _PushState)
assert not item.push_state.finalized
# TODO(vadimsh): Do not read from |content| generator when retrying push.
assert len(push_urls) == 2, str(push_urls)
item = items[i]
assert item.push_state is None
- item.push_state = IsolateServer._PushState(push_urls[0], push_urls[1])
+ item.push_state = _PushState(push_urls[0], push_urls[1])
missing_items.append(item)
logging.info('Queried %d files, %d cache hit',
len(items), len(items) - len(missing_items))
It can be accessed concurrently from multiple threads, so it should protect
its internal state with some lock.
"""
+ cache_dir = None
def __enter__(self):
"""Context manager interface."""
return items, metadata
-def archive(storage, algo, files, blacklist):
- """Stores every entries and returns the relevant data."""
+def archive_files_to_storage(storage, algo, files, blacklist):
+ """Stores every entries and returns the relevant data.
+
+ Arguments:
+ storage: a Storage object that communicates with the remote object store.
+ algo: an hashlib class to hash content. Usually hashlib.sha1.
+ files: list of file paths to upload. If a directory is specified, a
+ .isolated file is created and its hash is returned.
+ blacklist: function that returns True if a file should be omitted.
+ """
assert all(isinstance(i, unicode) for i in files), files
if len(files) != len(set(map(os.path.abspath, files))):
raise Error('Duplicate entries found.')
raise Error('%s is neither a file or directory.' % f)
except OSError:
raise Error('Failed to process %s.' % f)
- # Technically we would care about the uploaded files but we don't much in
- # practice.
+ # Technically we would care about which files were uploaded but we don't
+ # much in practice.
_uploaded_files = storage.upload_items(items_to_upload)
return results
finally:
shutil.rmtree(tempdir)
+def archive(out, namespace, files, blacklist):
+ if files == ['-']:
+ files = sys.stdin.readlines()
+
+ if not files:
+ raise Error('Nothing to upload')
+
+ files = [f.decode('utf-8') for f in files]
+ algo = get_hash_algo(namespace)
+ blacklist = tools.gen_blacklist(blacklist)
+ with get_storage(out, namespace) as storage:
+ results = archive_files_to_storage(storage, algo, files, blacklist)
+ print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
+
+
@subcommand.usage('<file1..fileN> or - to read from stdin')
def CMDarchive(parser, args):
"""Archives data to the server.
directories, the .isolated generated for the directory is listed as the
directory entry itself.
"""
+ add_isolate_server_options(parser, False)
parser.add_option(
'--blacklist',
action='append', default=list(DEFAULT_BLACKLIST),
help='List of regexp to use as blacklist filter when uploading '
'directories')
options, files = parser.parse_args(args)
-
- if files == ['-']:
- files = sys.stdin.readlines()
-
- if not files:
- parser.error('Nothing to upload')
-
- files = [f.decode('utf-8') for f in files]
- algo = get_hash_algo(options.namespace)
- blacklist = tools.gen_blacklist(options.blacklist)
+ process_isolate_server_options(parser, options)
try:
- with get_storage(options.isolate_server, options.namespace) as storage:
- results = archive(storage, algo, files, blacklist)
+ archive(options.isolate_server, options.namespace, files, options.blacklist)
except Error as e:
parser.error(e.args[0])
- print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
return 0
It can either download individual files or a complete tree from a .isolated
file.
"""
+ add_isolate_server_options(parser, True)
parser.add_option(
'-i', '--isolated', metavar='HASH',
help='hash of an isolated file, .isolated file content is discarded, use '
'-t', '--target', metavar='DIR', default=os.getcwd(),
help='destination directory')
options, args = parser.parse_args(args)
+ process_isolate_server_options(parser, options)
if args:
parser.error('Unsupported arguments: %s' % args)
if bool(options.isolated) == bool(options.file):
options.target = os.path.abspath(options.target)
- with get_storage(options.isolate_server, options.namespace) as storage:
+ remote = options.isolate_server or options.indir
+ with get_storage(remote, options.namespace) as storage:
# Fetching individual files.
if options.file:
channel = threading_utils.TaskChannel()
return 0
+@subcommand.usage('<file1..fileN> or - to read from stdin')
+def CMDhashtable(parser, args):
+ """Archives data to a hashtable on the file system.
+
+ If a directory is specified, a .isolated file is created the whole directory
+ is uploaded. Then this .isolated file can be included in another one to run
+ commands.
+
+ The commands output each file that was processed with its content hash. For
+ directories, the .isolated generated for the directory is listed as the
+ directory entry itself.
+ """
+ add_outdir_options(parser)
+ parser.add_option(
+ '--blacklist',
+ action='append', default=list(DEFAULT_BLACKLIST),
+ help='List of regexp to use as blacklist filter when uploading '
+ 'directories')
+ options, files = parser.parse_args(args)
+ process_outdir_options(parser, options, os.getcwd())
+ try:
+ # Do not compress files when archiving to the file system.
+ archive(options.outdir, 'default', files, options.blacklist)
+ except Error as e:
+ parser.error(e.args[0])
+ return 0
+
+
+def add_isolate_server_options(parser, add_indir):
+ """Adds --isolate-server and --namespace options to parser.
+
+ Includes --indir if desired.
+ """
+ parser.add_option(
+ '-I', '--isolate-server',
+ metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
+ help='URL of the Isolate Server to use. Defaults to the environment '
+ 'variable ISOLATE_SERVER if set. No need to specify https://, this '
+ 'is assumed.')
+ parser.add_option(
+ '--namespace', default='default-gzip',
+ help='The namespace to use on the Isolate Server, default: %default')
+ if add_indir:
+ parser.add_option(
+ '--indir', metavar='DIR',
+ help='Directory used to store the hashtable instead of using an '
+ 'isolate server.')
+
+
+def process_isolate_server_options(parser, options):
+ """Processes the --isolate-server and --indir options and aborts if neither is
+ specified.
+ """
+ has_indir = hasattr(options, 'indir')
+ if not options.isolate_server:
+ if not has_indir:
+ parser.error('--isolate-server is required.')
+ elif not options.indir:
+ parser.error('Use one of --indir or --isolate-server.')
+ else:
+ if has_indir and options.indir:
+ parser.error('Use only one of --indir or --isolate-server.')
+
+ if options.isolate_server:
+ parts = urlparse.urlparse(options.isolate_server, 'https')
+ if parts.query:
+ parser.error('--isolate-server doesn\'t support query parameter.')
+ if parts.fragment:
+ parser.error('--isolate-server doesn\'t support fragment in the url.')
+ # urlparse('foo.com') will result in netloc='', path='foo.com', which is not
+ # what is desired here.
+ new = list(parts)
+ if not new[1] and new[2]:
+ new[1] = new[2].rstrip('/')
+ new[2] = ''
+ new[2] = new[2].rstrip('/')
+ options.isolate_server = urlparse.urlunparse(new)
+ return
+
+ if file_path.is_url(options.indir):
+ parser.error('Can\'t use an URL for --indir.')
+ options.indir = unicode(options.indir).replace('/', os.path.sep)
+ options.indir = os.path.abspath(
+ os.path.normpath(os.path.join(os.getcwd(), options.indir)))
+ if not os.path.isdir(options.indir):
+ parser.error('Path given to --indir must exist.')
+
+
+
+def add_outdir_options(parser):
+ """Adds --outdir, which is orthogonal to --isolate-server.
+
+ Note: On upload, separate commands are used between 'archive' and 'hashtable'.
+ On 'download', the same command can download from either an isolate server or
+ a file system.
+ """
+ parser.add_option(
+ '-o', '--outdir', metavar='DIR',
+ help='Directory used to recreate the tree.')
+
+
+def process_outdir_options(parser, options, cwd):
+ if not options.outdir:
+ parser.error('--outdir is required.')
+ if file_path.is_url(options.outdir):
+ parser.error('Can\'t use an URL for --outdir.')
+ options.outdir = unicode(options.outdir).replace('/', os.path.sep)
+ # outdir doesn't need native path case since tracing is never done from there.
+ options.outdir = os.path.abspath(
+ os.path.normpath(os.path.join(cwd, options.outdir)))
+ # In theory, we'd create the directory outdir right away. Defer doing it in
+ # case there's errors in the command line.
+
+
class OptionParserIsolateServer(tools.OptionParserWithLogging):
def __init__(self, **kwargs):
tools.OptionParserWithLogging.__init__(
version=__version__,
prog=os.path.basename(sys.modules[__name__].__file__),
**kwargs)
- self.add_option(
- '-I', '--isolate-server',
- metavar='URL', default='',
- help='Isolate server to use')
- self.add_option(
- '--namespace', default='default-gzip',
- help='The namespace to use on the server, default: %default')
+ auth.add_auth_options(self)
def parse_args(self, *args, **kwargs):
options, args = tools.OptionParserWithLogging.parse_args(
self, *args, **kwargs)
- options.isolate_server = options.isolate_server.rstrip('/')
- if not options.isolate_server:
- self.error('--isolate-server is required.')
+ auth.process_auth_options(self, options)
return options, args