3 # Copyright (C) 2016 and later: Unicode, Inc. and others.
4 # License & terms of use: http://www.unicode.org/copyright.html
6 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
10 # Script to check and fix svn property settings for ICU source files.
11 # Also check for the correct line endings on files with svn:eol-style = native
13 # THIS SCRIPT DOES NOT WORK ON WINDOWS
14 # It only works correctly on platforms where the native line ending is a plain \n
17 # icu-svnprops-check.py [options]
20 # -f | --fix Fix any problems that are found
21 # -h | --help Print a usage line and exit.
23 # The tool operates recursively on the directory from which it is run.
24 # Only files from the svn repository are checked.
25 # No changes are made to the repository; only the working copy will be altered.
34 # svn autoprops definitions.
35 # Copy and paste here the ICU recommended auto-props from
36 # http://icu-project.org/docs/subversion_howto/index.html
38 # This program will parse this autoprops string, and verify that files in
39 # the repository have the recommeded properties set.
42 ### Section for configuring automatic properties.
44 ### The format of the entries is:
45 ### file-name-pattern = propname[=value][;propname[=value]...]
46 ### The file-name-pattern can contain wildcards (such as '*' and
47 ### '?'). All entries which match will be applied to the file.
48 ### Note that auto-props functionality must be enabled, which
49 ### is typically done by setting the 'enable-auto-props' option.
50 *.c = svn:eol-style=native
51 *.cc = svn:eol-style=native
52 *.cpp = svn:eol-style=native
53 *.h = svn:eol-style=native
54 *.rc = svn:eol-style=native
55 *.dsp = svn:eol-style=native
56 *.dsw = svn:eol-style=native
57 *.sln = svn:eol-style=native
58 *.vcproj = svn:eol-style=native
59 configure = svn:eol-style=native;svn:executable
60 *.sh = svn:eol-style=native;svn:executable
61 *.pl = svn:eol-style=native;svn:executable
62 *.py = svn:eol-style=native;svn:executable
63 *.txt = svn:mime-type=text/plain;svn:eol-style=native
64 *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
65 *.ucm = svn:eol-style=native
66 *.html = svn:eol-style=native;svn:mime-type=text/html
67 *.htm = svn:eol-style=native;svn:mime-type=text/html
68 *.xml = svn:eol-style=native
69 Makefile = svn:eol-style=native
70 *.in = svn:eol-style=native
71 *.mak = svn:eol-style=native
72 *.mk = svn:eol-style=native
73 *.png = svn:mime-type=image/png
74 *.jpeg = svn:mime-type=image/jpeg
75 *.jpg = svn:mime-type=image/jpeg
76 *.bin = svn:mime-type=application/octet-stream
77 *.brk = svn:mime-type=application/octet-stream
78 *.cnv = svn:mime-type=application/octet-stream
79 *.dat = svn:mime-type=application/octet-stream
80 *.icu = svn:mime-type=application/octet-stream
81 *.res = svn:mime-type=application/octet-stream
82 *.spp = svn:mime-type=application/octet-stream
83 # new additions 2007-dec-5 srl
84 *.rtf = mime-type=text/rtf
85 *.pdf = mime-type=application/pdf
86 # changed 2008-04-08: modified .txt, above, adding mime-type
87 # changed 2010-11-09: modified .java, adding mime-type
88 # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
92 # file_types: The parsed form of the svn auto-props specification.
93 # A list of file types - .cc, .cpp, .txt, etc.
94 # each element is a [type, proplist]
95 # "type" is a regular expression string that will match a file name
96 # prop list is another list, one element per property.
97 # Each property item is a two element list, [prop name, prop value]
100 def parse_auto_props():
101 aprops = svn_auto_props.splitlines()
102 for propline in aprops:
103 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines
105 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line.
107 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> =
108 print "Bad line from autoprops definitions: " + propline
110 file_type, string_proplist = propline.split("=", 1)
112 #transform the file type expression from autoprops into a normal regular expression.
113 # e.g. "*.cpp" ==> ".*\.cpp$"
114 file_type = file_type.strip()
115 file_type = file_type.replace(".", "\.")
116 file_type = file_type.replace("*", ".*")
117 file_type = file_type + "$"
119 # example string_proplist at this point: " svn:eol-style=native;svn:executable"
120 # split on ';' into a list of properties. The negative lookahead and lookbehind
121 # in the split regexp are to prevent matching on ';;', which is an escaped ';'
122 # within a property value.
123 string_proplist = re.split("(?<!;);(?!;)", string_proplist)
125 for prop in string_proplist:
126 if prop.find("=") >= 0:
127 prop_name, prop_val = prop.split("=", 1)
129 # properties with no explicit value, e.g. svn:executable
130 prop_name, prop_val = prop, ""
131 prop_name = prop_name.strip()
132 prop_val = prop_val.strip()
133 # unescape any ";;" in a property value, e.g. the mime-type from
134 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
135 prop_val = prop_val.replace(";;", ";");
136 proplist.append((prop_name, prop_val))
138 file_types.append((file_type, proplist))
143 output_file = os.popen(cmd);
144 output_text = output_file.read();
145 exit_status = output_file.close();
147 print >>sys.stderr, '"', cmd, '" failed. Exiting.'
148 sys.exit(exit_status)
153 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
157 # UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8
158 # file_name: name of a text file.
159 # base_mime_type: svn:mime-type property value from the auto-props file (no charset= part)
160 # actual_mime_type: existing svn:mime-type property value for the file.
161 # return: svn:mime-type property value, with charset added when appropriate.
163 def check_utf8(file_name, base_mime_type, actual_mime_type):
165 # If the file already has a charset in its mime-type, don't make any change.
167 if actual_mime_type.find("charset=") > 0:
168 return actual_mime_type;
170 f = open(file_name, 'r')
174 if all(ord(byte) < 128 for byte in bytes):
176 # print "Pure ASCII " + file_name
177 return base_mime_type
180 bytes.decode("UTF-8")
181 except UnicodeDecodeError:
182 print "warning: %s: not ASCII, not UTF-8" % file_name
183 return base_mime_type
185 if ord(bytes[0]) != 0xef:
186 print "UTF-8 file with no BOM: " + file_name
188 # Append charset=utf-8.
189 return base_mime_type + ';charset=utf-8'
193 fix_problems = False;
195 opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
196 except getopt.GetoptError:
197 print "unrecognized option: " + argv[0]
200 for opt, arg in opts:
201 if opt in ("-h", "--help"):
204 if opt in ("-f", "--fix"):
207 print "unexpected command line argument"
212 output = runCommand("svn ls -R ");
213 file_list = output.splitlines()
217 # print "Skipping dir " + f
219 if not os.path.isfile(f):
220 print "Repository file not in working copy: " + f
223 for file_pattern, props in file_types:
224 if re.match(file_pattern, f):
226 for propname, propval in props:
227 actual_propval = runCommand("svn propget --strict " + propname + " " + f)
228 #print propname + ": " + actual_propval
229 if propname == "svn:mime-type" and propval.find("text/") == 0:
230 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
231 propval = check_utf8(f, propval, actual_propval)
232 if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
233 print "svn propset %s '%s' %s" % (propname, propval, f)
235 os.system("svn propset %s '%s' %s" % (propname, propval, f))
236 if propname == "svn:eol-style" and propval == "native":
237 if os.system("grep -q -v \r " + f):
239 print f + ": Removing DOS CR characters."
240 os.system("sed -i s/\r// " + f);
242 print f + " contains DOS CR characters."
245 if __name__ == "__main__":