Imported Upstream version 4.4.1
[platform/upstream/python-lxml.git] / buildlibxml.py
1 import os, re, sys, subprocess
2 import tarfile
3 from distutils import log, version
4 from contextlib import closing
5 from ftplib import FTP
6
7 try:
8     from urlparse import urljoin, unquote, urlparse
9     from urllib import urlretrieve, urlopen, urlcleanup
10 except ImportError:
11     from urllib.parse import urljoin, unquote, urlparse
12     from urllib.request import urlretrieve, urlopen, urlcleanup
13
14 multi_make_options = []
15 try:
16     import multiprocessing
17     cpus = multiprocessing.cpu_count()
18     if cpus > 1:
19         if cpus > 5:
20             cpus = 5
21         multi_make_options = ['-j%d' % (cpus+1)]
22 except:
23     pass
24
25
26 # use pre-built libraries on Windows
27
28 def download_and_extract_windows_binaries(destdir):
29     url = "https://github.com/mhils/libxml2-win-binaries/releases"
30     filenames = list(_list_dir_urllib(url))
31
32     release_path = "/download/%s/" % find_max_version(
33         "library release", filenames, re.compile(r"/releases/tag/([0-9.]+[0-9])$"))
34     url += release_path
35     filenames = [
36         filename.rsplit('/', 1)[1]
37         for filename in filenames
38         if release_path in filename
39     ]
40
41     arch = "win64" if sys.maxsize > 2**32 else "win32"
42     if sys.version_info < (3, 5):
43         arch = 'vs2008.' + arch
44
45     libs = {}
46     for libname in ['libxml2', 'libxslt', 'zlib', 'iconv']:
47         libs[libname] = "%s-%s.%s.zip" % (
48             libname,
49             find_max_version(libname, filenames),
50             arch,
51         )
52
53     if not os.path.exists(destdir):
54         os.makedirs(destdir)
55
56     for libname, libfn in libs.items():
57         srcfile = urljoin(url, libfn)
58         destfile = os.path.join(destdir, libfn)
59         if os.path.exists(destfile + ".keep"):
60             print('Using local copy of  "{}"'.format(srcfile))
61         else:
62             print('Retrieving "%s" to "%s"' % (srcfile, destfile))
63             urlcleanup()  # work around FTP bug 27973 in Py2.7.12+
64             urlretrieve(srcfile, destfile)
65         d = unpack_zipfile(destfile, destdir)
66         libs[libname] = d
67
68     return libs
69
70
71 def find_top_dir_of_zipfile(zipfile):
72     topdir = None
73     files = [f.filename for f in zipfile.filelist]
74     dirs = [d for d in files if d.endswith('/')]
75     if dirs:
76         dirs.sort(key=len)
77         topdir = dirs[0]
78         topdir = topdir[:topdir.index("/")+1]
79         for path in files:
80             if not path.startswith(topdir):
81                 topdir = None
82                 break
83     assert topdir, (
84         "cannot determine single top-level directory in zip file %s" %
85         zipfile.filename)
86     return topdir.rstrip('/')
87
88
89 def unpack_zipfile(zipfn, destdir):
90     assert zipfn.endswith('.zip')
91     import zipfile
92     print('Unpacking %s into %s' % (os.path.basename(zipfn), destdir))
93     f = zipfile.ZipFile(zipfn)
94     try:
95         extracted_dir = os.path.join(destdir, find_top_dir_of_zipfile(f))
96         f.extractall(path=destdir)
97     finally:
98         f.close()
99     assert os.path.exists(extracted_dir), 'missing: %s' % extracted_dir
100     return extracted_dir
101
102
103 def get_prebuilt_libxml2xslt(download_dir, static_include_dirs, static_library_dirs):
104     assert sys.platform.startswith('win')
105     libs = download_and_extract_windows_binaries(download_dir)
106     for libname, path in libs.items():
107         i = os.path.join(path, 'include')
108         l = os.path.join(path, 'lib')
109         assert os.path.exists(i), 'does not exist: %s' % i
110         assert os.path.exists(l), 'does not exist: %s' % l
111         static_include_dirs.append(i)
112         static_library_dirs.append(l)
113
114
115 ## Routines to download and build libxml2/xslt from sources:
116
117 LIBXML2_LOCATION = 'http://xmlsoft.org/sources/'
118 LIBICONV_LOCATION = 'https://ftp.gnu.org/pub/gnu/libiconv/'
119 ZLIB_LOCATION = 'https://zlib.net/'
120 match_libfile_version = re.compile('^[^-]*-([.0-9-]+)[.].*').match
121
122
123 def _find_content_encoding(response, default='iso8859-1'):
124     from email.message import Message
125     content_type = response.headers.get('Content-Type')
126     if content_type:
127         msg = Message()
128         msg.add_header('Content-Type', content_type)
129         charset = msg.get_content_charset(default)
130     else:
131         charset = default
132     return charset
133
134
135 def remote_listdir(url):
136     try:
137         return _list_dir_urllib(url)
138     except IOError:
139         assert url.lower().startswith('ftp://')
140         print("Requesting with urllib failed. Falling back to ftplib. "
141               "Proxy argument will be ignored for %s" % url)
142         return _list_dir_ftplib(url)
143
144
145 def _list_dir_ftplib(url):
146     parts = urlparse(url)
147     ftp = FTP(parts.netloc)
148     try:
149         ftp.login()
150         ftp.cwd(parts.path)
151         data = []
152         ftp.dir(data.append)
153     finally:
154         ftp.quit()
155     return parse_text_ftplist("\n".join(data))
156
157
158 def _list_dir_urllib(url):
159     with closing(urlopen(url)) as res:
160         charset = _find_content_encoding(res)
161         content_type = res.headers.get('Content-Type')
162         data = res.read()
163
164     data = data.decode(charset)
165     if content_type and content_type.startswith('text/html'):
166         files = parse_html_filelist(data)
167     else:
168         files = parse_text_ftplist(data)
169     return files
170
171
172 def http_listfiles(url, re_pattern):
173     with closing(urlopen(url)) as res:
174         charset = _find_content_encoding(res)
175         data = res.read()
176     files = re.findall(re_pattern, data.decode(charset))
177     return files
178
179
180 def parse_text_ftplist(s):
181     for line in s.splitlines():
182         if not line.startswith('d'):
183             # -rw-r--r--   1 ftp      ftp           476 Sep  1  2011 md5sum.txt
184             # Last (9th) element is 'md5sum.txt' in the above example, but there
185             # may be variations, so we discard only the first 8 entries.
186             yield line.split(None, 8)[-1]
187
188
189 def parse_html_filelist(s):
190     re_href = re.compile(
191         r'<a\s+(?:[^>]*\s+)?href=["\']([^;?"\']+?)[;?"\']',
192         re.I|re.M)
193     links = set(re_href.findall(s))
194     for link in links:
195         if not link.endswith('/'):
196             yield unquote(link)
197
198
199 def tryint(s):
200     try:
201         return int(s)
202     except ValueError:
203         return s
204
205
206 def download_libxml2(dest_dir, version=None):
207     """Downloads libxml2, returning the filename where the library was downloaded"""
208     #version_re = re.compile(r'LATEST_LIBXML2_IS_([0-9.]+[0-9](?:-[abrc0-9]+)?)')
209     version_re = re.compile(r'libxml2-([0-9.]+[0-9]).tar.gz')
210     filename = 'libxml2-%s.tar.gz'
211     return download_library(dest_dir, LIBXML2_LOCATION, 'libxml2',
212                             version_re, filename, version=version)
213
214
215 def download_libxslt(dest_dir, version=None):
216     """Downloads libxslt, returning the filename where the library was downloaded"""
217     #version_re = re.compile(r'LATEST_LIBXSLT_IS_([0-9.]+[0-9](?:-[abrc0-9]+)?)')
218     version_re = re.compile(r'libxslt-([0-9.]+[0-9]).tar.gz')
219     filename = 'libxslt-%s.tar.gz'
220     return download_library(dest_dir, LIBXML2_LOCATION, 'libxslt',
221                             version_re, filename, version=version)
222
223
224 def download_libiconv(dest_dir, version=None):
225     """Downloads libiconv, returning the filename where the library was downloaded"""
226     version_re = re.compile(r'libiconv-([0-9.]+[0-9]).tar.gz')
227     filename = 'libiconv-%s.tar.gz'
228     return download_library(dest_dir, LIBICONV_LOCATION, 'libiconv',
229                             version_re, filename, version=version)
230
231
232 def download_zlib(dest_dir, version):
233     """Downloads zlib, returning the filename where the library was downloaded"""
234     version_re = re.compile(r'zlib-([0-9.]+[0-9]).tar.gz')
235     filename = 'zlib-%s.tar.gz'
236     return download_library(dest_dir, ZLIB_LOCATION, 'zlib',
237                             version_re, filename, version=version)
238
239
240 def find_max_version(libname, filenames, version_re=None):
241     if version_re is None:
242         version_re = re.compile(r'%s-([0-9.]+[0-9](?:-[abrc0-9]+)?)' % libname)
243     versions = []
244     for fn in filenames:
245         match = version_re.search(fn)
246         if match:
247             version_string = match.group(1)
248             versions.append((tuple(map(tryint, version_string.split('.'))),
249                              version_string))
250     if not versions:
251         raise Exception(
252             "Could not find the most current version of %s from the files: %s" % (
253                 libname, filenames))
254     versions.sort()
255     version_string = versions[-1][-1]
256     print('Latest version of %s is %s' % (libname, version_string))
257     return version_string
258
259
260 def download_library(dest_dir, location, name, version_re, filename, version=None):
261     if version is None:
262         try:
263             if location.startswith('ftp://'):
264                 fns = remote_listdir(location)
265             else:
266                 fns = http_listfiles(location, '(%s)' % filename.replace('%s', '(?:[0-9.]+[0-9])'))
267             version = find_max_version(name, fns, version_re)
268         except IOError:
269             # network failure - maybe we have the files already?
270             latest = (0,0,0)
271             fns = os.listdir(dest_dir)
272             for fn in fns:
273                 if fn.startswith(name+'-'):
274                     match = match_libfile_version(fn)
275                     if match:
276                         version_tuple = tuple(map(tryint, match.group(1).split('.')))
277                         if version_tuple > latest:
278                             latest = version_tuple
279                             filename = fn
280                             version = None
281             if latest == (0,0,0):
282                 raise
283     if version:
284         filename = filename % version
285     full_url = urljoin(location, filename)
286     dest_filename = os.path.join(dest_dir, filename)
287     if os.path.exists(dest_filename):
288         print(('Using existing %s downloaded into %s '
289                '(delete this file if you want to re-download the package)') % (
290             name, dest_filename))
291     else:
292         print('Downloading %s into %s from %s' % (name, dest_filename, full_url))
293         urlcleanup()  # work around FTP bug 27973 in Py2.7.12
294         urlretrieve(full_url, dest_filename)
295     return dest_filename
296
297
298 def unpack_tarball(tar_filename, dest):
299     print('Unpacking %s into %s' % (os.path.basename(tar_filename), dest))
300     tar = tarfile.open(tar_filename)
301     base_dir = None
302     for member in tar:
303         base_name = member.name.split('/')[0]
304         if base_dir is None:
305             base_dir = base_name
306         elif base_dir != base_name:
307             print('Unexpected path in %s: %s' % (tar_filename, base_name))
308     tar.extractall(dest)
309     tar.close()
310     return os.path.join(dest, base_dir)
311
312
313 def call_subprocess(cmd, **kw):
314     import subprocess
315     cwd = kw.get('cwd', '.')
316     cmd_desc = ' '.join(cmd)
317     log.info('Running "%s" in %s' % (cmd_desc, cwd))
318     returncode = subprocess.call(cmd, **kw)
319     if returncode:
320         raise Exception('Command "%s" returned code %s' % (cmd_desc, returncode))
321
322
323 def safe_mkdir(dir):
324     if not os.path.exists(dir):
325         os.makedirs(dir)
326
327
328 def cmmi(configure_cmd, build_dir, multicore=None, **call_setup):
329     print('Starting build in %s' % build_dir)
330     call_subprocess(configure_cmd, cwd=build_dir, **call_setup)
331     if not multicore:
332         make_jobs = multi_make_options
333     elif int(multicore) > 1:
334         make_jobs = ['-j%s' % multicore]
335     else:
336         make_jobs = []
337     call_subprocess(
338         ['make'] + make_jobs,
339         cwd=build_dir, **call_setup)
340     call_subprocess(
341         ['make'] + make_jobs + ['install'],
342         cwd=build_dir, **call_setup)
343
344
345 def configure_darwin_env(env_setup):
346     import platform
347     # check target architectures on MacOS-X (ppc, i386, x86_64)
348     major_version, minor_version = tuple(map(int, platform.mac_ver()[0].split('.')[:2]))
349     if major_version > 7:
350         # Check to see if ppc is supported (XCode4 drops ppc support)
351         include_ppc = True
352         if os.path.exists('/usr/bin/xcodebuild'):
353             pipe = subprocess.Popen(['/usr/bin/xcodebuild', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
354             out, _ = pipe.communicate()
355             xcode_version = (out.decode('utf8').splitlines() or [''])[0]
356             # Also parse only first digit, because 3.2.1 can't be parsed nicely
357             if (xcode_version.startswith('Xcode') and
358                 version.StrictVersion(xcode_version.split()[1]) >= version.StrictVersion('4.0')):
359                 include_ppc = False
360         arch_string = ""
361         if include_ppc:
362             arch_string = "-arch ppc "
363         if minor_version < 6:
364             env_default = {
365                 'CFLAGS': arch_string + "-arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk -O2",
366                 'LDFLAGS': arch_string + "-arch i386 -isysroot /Developer/SDKs/MacOSX10.4u.sdk",
367                 'MACOSX_DEPLOYMENT_TARGET': "10.3"
368             }
369         else:
370             env_default = {
371                 'CFLAGS': arch_string + "-arch i386 -arch x86_64 -O2",
372                 'LDFLAGS': arch_string + "-arch i386 -arch x86_64",
373                 'MACOSX_DEPLOYMENT_TARGET': "10.6"
374             }
375         env = os.environ.copy()
376         env_default.update(env)
377         env_setup['env'] = env_default
378
379
380 def build_libxml2xslt(download_dir, build_dir,
381                       static_include_dirs, static_library_dirs,
382                       static_cflags, static_binaries,
383                       libxml2_version=None,
384                       libxslt_version=None,
385                       libiconv_version=None,
386                       zlib_version=None,
387                       multicore=None):
388     safe_mkdir(download_dir)
389     safe_mkdir(build_dir)
390     zlib_dir = unpack_tarball(download_zlib(download_dir, zlib_version), build_dir)
391     libiconv_dir = unpack_tarball(download_libiconv(download_dir, libiconv_version), build_dir)
392     libxml2_dir  = unpack_tarball(download_libxml2(download_dir, libxml2_version), build_dir)
393     libxslt_dir  = unpack_tarball(download_libxslt(download_dir, libxslt_version), build_dir)
394     prefix = os.path.join(os.path.abspath(build_dir), 'libxml2')
395     safe_mkdir(prefix)
396
397     call_setup = {}
398     if sys.platform == 'darwin':
399         configure_darwin_env(call_setup)
400
401     configure_cmd = ['./configure',
402                      '--disable-dependency-tracking',
403                      '--disable-shared',
404                      '--prefix=%s' % prefix,
405                      ]
406
407     # build zlib
408     zlib_configure_cmd = [
409         './configure',
410         '--prefix=%s' % prefix,
411     ]
412     cmmi(zlib_configure_cmd, zlib_dir, multicore, **call_setup)
413
414     # build libiconv
415     cmmi(configure_cmd, libiconv_dir, multicore, **call_setup)
416
417     # build libxml2
418     libxml2_configure_cmd = configure_cmd + [
419         '--without-python',
420         '--with-iconv=%s' % prefix,
421         '--with-zlib=%s' % prefix,
422     ]
423
424     if not libxml2_version:
425         libxml2_version = os.path.basename(libxml2_dir).split('-', 1)[-1]
426
427     if tuple(map(tryint, libxml2_version.split('-', 1)[0].split('.'))) >= (2, 9, 5):
428         libxml2_configure_cmd.append('--without-lzma')  # can't currently build that
429
430     try:
431         if tuple(map(tryint, libxml2_version.split('-', 1)[0].split('.'))) >= (2, 7, 3):
432             libxml2_configure_cmd.append('--enable-rebuild-docs=no')
433     except Exception:
434         pass # this isn't required, so ignore any errors
435     cmmi(libxml2_configure_cmd, libxml2_dir, multicore, **call_setup)
436
437     # build libxslt
438     libxslt_configure_cmd = configure_cmd + [
439         '--without-python',
440         '--with-libxml-prefix=%s' % prefix,
441         '--without-crypto',
442     ]
443     cmmi(libxslt_configure_cmd, libxslt_dir, multicore, **call_setup)
444
445     # collect build setup for lxml
446     xslt_config = os.path.join(prefix, 'bin', 'xslt-config')
447     xml2_config = os.path.join(prefix, 'bin', 'xml2-config')
448
449     lib_dir = os.path.join(prefix, 'lib')
450     static_include_dirs.extend([
451             os.path.join(prefix, 'include'),
452             os.path.join(prefix, 'include', 'libxml2'),
453             os.path.join(prefix, 'include', 'libxslt'),
454             os.path.join(prefix, 'include', 'libexslt')])
455     static_library_dirs.append(lib_dir)
456
457     listdir = os.listdir(lib_dir)
458     static_binaries += [os.path.join(lib_dir, filename)
459         for lib in ['libxml2', 'libexslt', 'libxslt', 'iconv', 'libz']
460         for filename in listdir
461         if lib in filename and filename.endswith('.a')]
462
463     return xml2_config, xslt_config