Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / chrome / common / extensions / docs / server2 / path_canonicalizer.py
index 23da9e1..04ad9be 100644 (file)
@@ -9,13 +9,16 @@ from future import Future
 from path_util import SplitParent
 from special_paths import SITE_VERIFICATION_FILE
 
+def _Normalize(file_name, splittext=False):
+  normalized = file_name
+  if splittext:
+    normalized = posixpath.splitext(file_name)[0]
+  normalized = normalized.replace('.', '').replace('-', '').replace('_', '')
+  return normalized.lower()
 
-def _SimplifyFileName(file_name):
-  return (posixpath.splitext(file_name)[0]
-      .lower()
-      .replace('.', '')
-      .replace('-', '')
-      .replace('_', ''))
+def _CommonNormalizedPrefix(first_file, second_file):
+  return posixpath.commonprefix((_Normalize(first_file),
+                                 _Normalize(second_file)))
 
 
 class PathCanonicalizer(object):
@@ -60,7 +63,8 @@ class PathCanonicalizer(object):
                 path == SITE_VERIFICATION_FILE):
               canonical_path += ext
             canonical_paths.add(canonical_path)
-            simplified_paths_map[_SimplifyFileName(path)].append(canonical_path)
+            simplified_paths_map[_Normalize(path, splittext=True)].append(
+                canonical_path)
         # Store |simplified_paths_map| sorted. Ties in length are broken by
         # taking the shortest, lexicographically smallest path.
         for path_list in simplified_paths_map.itervalues():
@@ -88,18 +92,26 @@ class PathCanonicalizer(object):
     # Path not found. Our single heuristic: find |base| in the directory
     # structure with the longest common prefix of |path|.
     _, base = SplitParent(path)
-    potential_paths = simplified_paths_map.get(_SimplifyFileName(base))
-    if not potential_paths:
+
+    # Paths with a non-extension dot separator lose information in
+    # _SimplifyFileName, so we try paths both with and without the dot to
+    # maximize the possibility of finding the right path.
+    potential_paths = (
+        simplified_paths_map.get(_Normalize(base), []) +
+        simplified_paths_map.get(_Normalize(base, splittext=True), []))
+
+    if potential_paths == []:
       # There is no file with anything close to that name.
       return path
 
     # The most likely canonical file is the one with the longest common prefix
     # with |path|. This is slightly weaker than it could be; |path| is
-    # compared, not the simplified form of |path|, which may matter.
+    # compared without symbols, not the simplified form of |path|,
+    # which may matter.
     max_prefix = potential_paths[0]
-    max_prefix_length = len(posixpath.commonprefix((max_prefix, path)))
+    max_prefix_length = len(_CommonNormalizedPrefix(max_prefix, path))
     for path_for_file in potential_paths[1:]:
-      prefix_length = len(posixpath.commonprefix((path_for_file, path)))
+      prefix_length = len(_CommonNormalizedPrefix(path_for_file, path))
       if prefix_length > max_prefix_length:
         max_prefix, max_prefix_length = path_for_file, prefix_length