Upstream version 5.34.104.0
[platform/framework/web/crosswalk.git] / src / chrome / common / extensions / docs / server2 / path_canonicalizer.py
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 from collections import defaultdict
6 import posixpath
7
8 from future import Gettable, Future
9 from path_util import SplitParent
10 from special_paths import SITE_VERIFICATION_FILE
11
12
13 def _SimplifyFileName(file_name):
14   return (posixpath.splitext(file_name)[0]
15       .lower()
16       .replace('.', '')
17       .replace('-', '')
18       .replace('_', ''))
19
20
21 class PathCanonicalizer(object):
22   '''Transforms paths into their canonical forms. Since the docserver has had
23   many incarnations - e.g. there didn't use to be apps/ - there may be old
24   paths lying around the webs. We try to redirect those to where they are now.
25   '''
26   def __init__(self,
27                file_system,
28                object_store_creator,
29                strip_extensions):
30     # |strip_extensions| is a list of file extensions (e.g. .html) that should
31     # be stripped for a path's canonical form.
32     self._cache = object_store_creator.Create(
33         PathCanonicalizer, category=file_system.GetIdentity())
34     self._file_system = file_system
35     self._strip_extensions = strip_extensions
36
37   def _LoadCache(self):
38     cached_future = self._cache.GetMulti(('canonical_paths',
39                                           'simplified_paths_map'))
40
41     def resolve():
42       # |canonical_paths| is the pre-calculated set of canonical paths.
43       # |simplified_paths_map| is a lazily populated mapping of simplified file
44       # names to a list of full paths that contain them. For example,
45       #  - browseraction: [extensions/browserAction.html]
46       #  - storage: [apps/storage.html, extensions/storage.html]
47       cached = cached_future.Get()
48       canonical_paths, simplified_paths_map = (
49           cached.get('canonical_paths'), cached.get('simplified_paths_map'))
50
51       if canonical_paths is None:
52         assert simplified_paths_map is None
53         canonical_paths = set()
54         simplified_paths_map = defaultdict(list)
55         for base, dirs, files in self._file_system.Walk(''):
56           for path in dirs + files:
57             path_without_ext, ext = posixpath.splitext(path)
58             canonical_path = posixpath.join(base, path_without_ext)
59             if (ext not in self._strip_extensions or
60                 path == SITE_VERIFICATION_FILE):
61               canonical_path += ext
62             canonical_paths.add(canonical_path)
63             simplified_paths_map[_SimplifyFileName(path)].append(canonical_path)
64         # Store |simplified_paths_map| sorted. Ties in length are broken by
65         # taking the shortest, lexicographically smallest path.
66         for path_list in simplified_paths_map.itervalues():
67           path_list.sort(key=lambda p: (len(p), p))
68         self._cache.SetMulti({
69           'canonical_paths': canonical_paths,
70           'simplified_paths_map': simplified_paths_map,
71         })
72       else:
73         assert simplified_paths_map is not None
74
75       return canonical_paths, simplified_paths_map
76
77     return Future(delegate=Gettable(resolve))
78
79   def Canonicalize(self, path):
80     '''Returns the canonical path for |path|.
81     '''
82     canonical_paths, simplified_paths_map = self._LoadCache().Get()
83
84     # Path may already be the canonical path.
85     if path in canonical_paths:
86       return path
87
88     # Path not found. Our single heuristic: find |base| in the directory
89     # structure with the longest common prefix of |path|.
90     _, base = SplitParent(path)
91     potential_paths = simplified_paths_map.get(_SimplifyFileName(base))
92     if not potential_paths:
93       # There is no file with anything close to that name.
94       return path
95
96     # The most likely canonical file is the one with the longest common prefix
97     # with |path|. This is slightly weaker than it could be; |path| is
98     # compared, not the simplified form of |path|, which may matter.
99     max_prefix = potential_paths[0]
100     max_prefix_length = len(posixpath.commonprefix((max_prefix, path)))
101     for path_for_file in potential_paths[1:]:
102       prefix_length = len(posixpath.commonprefix((path_for_file, path)))
103       if prefix_length > max_prefix_length:
104         max_prefix, max_prefix_length = path_for_file, prefix_length
105
106     return max_prefix
107
108   def Cron(self):
109     return self._LoadCache()