base/ldif.py

   1 """
   2 ldif - generate and parse LDIF data (see RFC 2849)
   3 written by Michael Stroeder <michael@stroeder.com>
   4
   5 See http://python-ldap.sourceforge.net for details.
   6
   7 $Id: ldif.py,v 1.3 2008/02/11 16:35:43 dwelch Exp $
   8
   9 Python compability note:
  10 Tested with Python 2.0+, but should work with Python 1.5.2+.
  11
  12 The python-ldap package is distributed under Python-style license.
  13
  14 Standard disclaimer:
  15    This software is made available by the author(s) to the public for free
  16    and "as is".  All users of this free software are solely and entirely
  17    responsible for their own choice and use of this software for their
  18    own purposes.  By using this software, each user agrees that the
  19    author(s) shall not be liable for damages of any kind in relation to
  20    its use or performance. The author(s) do not warrant that this software
  21    is fit for any purpose.
  22
  23 Note: This file is part of the python-ldap package. For the complete
  24   python-ldap package, please visit: http://sourceforge.net/projects/python-ldap/
  25
  26   It has been modified for use in HPLIP.
  27
  28 """
  29
  30 __version__ = '0.5.5'
  31
  32 __all__ = [
  33   # constants
  34   'ldif_pattern',
  35   # functions
  36   'AttrTypeandValueLDIF', 'CreateLDIF', 'ParseLDIF',
  37   # classes
  38   'LDIFWriter',
  39   'LDIFParser',
  40   'LDIFRecordList',
  41   'LDIFCopy',
  42 ]
  43
  44 import urlparse
  45 import urllib # TODO: Replace with urllib2 (urllib is deprecated in Python 3.0)
  46 import base64
  47 import re
  48 import types
  49
  50 try:
  51     from cStringIO import StringIO
  52 except ImportError:
  53     from StringIO import StringIO
  54
  55 from base.g import *
  56
  57 attrtype_pattern = r'[\w;.]+(;[\w_-]+)*'
  58 attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
  59 rdn_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
  60 dn_pattern   = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
  61 dn_regex   = re.compile('^%s$' % dn_pattern)
  62
  63 ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()
  64
  65 MOD_OP_INTEGER = {
  66   'add':0, 'delete':1, 'replace':2
  67 }
  68
  69 MOD_OP_STR = {
  70   0:'add', 1:'delete', 2:'replace'
  71 }
  72
  73 CHANGE_TYPES = ['add', 'delete', 'modify', 'modrdn']
  74 valid_changetype_dict = {}
  75 for c in CHANGE_TYPES:
  76     valid_changetype_dict[c]=None
  77
  78
  79 SAFE_STRING_PATTERN = '(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
  80 safe_string_re = re.compile(SAFE_STRING_PATTERN)
  81
  82 def is_dn(s):
  83     """
  84     returns 1 if s is a LDAP DN
  85     """
  86     if s=='':
  87         return 1
  88     rm = dn_regex.match(s)
  89     return rm!=None and rm.group(0)==s
  90
  91
  92 def needs_base64(s):
  93     """
  94     returns 1 if s has to be base-64 encoded because of special chars
  95     """
  96     return not safe_string_re.search(s) is None
  97
  98
  99 def list_dict(l):
 100     """
 101     return a dictionary with all items of l being the keys of the dictionary
 102     """
 103     return dict([(i, None) for i in l])
 104
 105
 106 class LDIFWriter:
 107     """
 108     Write LDIF entry or change records to file object
 109     Copy LDIF input to a file output object containing all data retrieved
 110     via URLs
 111     """
 112
 113     def __init__(self, output_file, base64_attrs=None, cols=76, line_sep='\n'):
 114         """
 115         output_file
 116             file object for output
 117         base64_attrs
 118             list of attribute types to be base64-encoded in any case
 119         cols
 120             Specifies how many columns a line may have before it's
 121             folded into many lines.
 122         line_sep
 123             String used as line separator
 124         """
 125         self._output_file = output_file
 126         self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
 127         self._cols = cols
 128         self._line_sep = line_sep
 129         self.records_written = 0
 130
 131     def _unfoldLDIFLine(self, line):
 132         """
 133         Write string line as one or more folded lines
 134         """
 135         # Check maximum line length
 136         line_len = len(line)
 137         if line_len<=self._cols:
 138             self._output_file.write(line)
 139             self._output_file.write(self._line_sep)
 140         else:
 141             # Fold line
 142             pos = self._cols
 143             self._output_file.write(line[0:min(line_len, self._cols)])
 144             self._output_file.write(self._line_sep)
 145             while pos<line_len:
 146                 self._output_file.write(' ')
 147                 self._output_file.write(line[pos:min(line_len, pos+self._cols-1)])
 148                 self._output_file.write(self._line_sep)
 149                 pos = pos+self._cols-1
 150         return # _unfoldLDIFLine()
 151
 152     def _unparseAttrTypeandValue(self, attr_type, attr_value):
 153         """
 154         Write a single attribute type/value pair
 155
 156         attr_type
 157               attribute type
 158         attr_value
 159               attribute value
 160         """
 161         if self._base64_attrs.has_key(attr_type.lower()) or \
 162            needs_base64(attr_value):
 163             # Encode with base64
 164             self._unfoldLDIFLine(':: '.join([attr_type, base64.encodestring(attr_value).replace('\n', '')]))
 165         else:
 166             self._unfoldLDIFLine(': '.join([attr_type, attr_value]))
 167         return # _unparseAttrTypeandValue()
 168
 169     def _unparseEntryRecord(self, entry):
 170         """
 171         entry
 172             dictionary holding an entry
 173         """
 174         attr_types = entry.keys()[:]
 175         attr_types.sort()
 176         for attr_type in attr_types:
 177             for attr_value in entry[attr_type]:
 178                 self._unparseAttrTypeandValue(attr_type, attr_value)
 179
 180     def _unparseChangeRecord(self, modlist):
 181         """
 182         modlist
 183             list of additions (2-tuple) or modifications (3-tuple)
 184         """
 185         mod_len = len(modlist[0])
 186         if mod_len==2:
 187             changetype = 'add'
 188         elif mod_len==3:
 189             changetype = 'modify'
 190         else:
 191             raise ValueError, "modlist item of wrong length"
 192         self._unparseAttrTypeandValue('changetype', changetype)
 193         for mod in modlist:
 194             if mod_len==2:
 195                 mod_type, mod_vals = mod
 196             elif mod_len==3:
 197                 mod_op, mod_type, mod_vals = mod
 198                 self._unparseAttrTypeandValue(MOD_OP_STR[mod_op], mod_type)
 199             else:
 200                 raise ValueError, "Subsequent modlist item of wrong length"
 201             if mod_vals:
 202                 for mod_val in mod_vals:
 203                     self._unparseAttrTypeandValue(mod_type, mod_val)
 204             if mod_len==3:
 205                 self._output_file.write('-'+self._line_sep)
 206
 207     def unparse(self, dn, record):
 208         """
 209         dn
 210               string-representation of distinguished name
 211         record
 212               Either a dictionary holding the LDAP entry {attrtype:record}
 213               or a list with a modify list like for LDAPObject.modify().
 214         """
 215         if not record:
 216             # Simply ignore empty records
 217             return
 218         # Start with line containing the distinguished name
 219         self._unparseAttrTypeandValue('dn', dn)
 220         # Dispatch to record type specific writers
 221         if isinstance(record, types.DictType):
 222             self._unparseEntryRecord(record)
 223         elif isinstance(record, types.ListType):
 224             self._unparseChangeRecord(record)
 225         else:
 226             raise ValueError, "Argument record must be dictionary or list"
 227         # Write empty line separating the records
 228         self._output_file.write(self._line_sep)
 229         # Count records written
 230         self.records_written = self.records_written+1
 231         return # unparse()
 232
 233
 234 def CreateLDIF(dn, record, base64_attrs=None, cols=76):
 235     """
 236     Create LDIF single formatted record including trailing empty line.
 237     This is a compability function. Use is deprecated!
 238
 239     dn
 240           string-representation of distinguished name
 241     record
 242           Either a dictionary holding the LDAP entry {attrtype:record}
 243           or a list with a modify list like for LDAPObject.modify().
 244     base64_attrs
 245           list of attribute types to be base64-encoded in any case
 246     cols
 247           Specifies how many columns a line may have before it's
 248           folded into many lines.
 249     """
 250     f = StringIO()
 251     ldif_writer = LDIFWriter(f, base64_attrs, cols, '\n')
 252     ldif_writer.unparse(dn, record)
 253     s = f.getvalue()
 254     f.close()
 255     return s
 256
 257
 258 class LDIFParser:
 259     """
 260     Base class for a LDIF parser. Applications should sub-class this
 261     class and override method handle() to implement something meaningful.
 262
 263     Public class attributes:
 264     records_read
 265           Counter for records processed so far
 266     """
 267
 268     def _stripLineSep(self, s):
 269         """
 270         Strip trailing line separators from s, but no other whitespaces
 271         """
 272         return s.strip('\n').strip('\r')
 273
 274 ##        if s[-2:] == '\r\n':
 275 ##            return s[:-2]
 276 ##        elif s[-1:] == '\n':
 277 ##            return s[:-1]
 278 ##        else:
 279 ##            return s
 280
 281     def __init__(self, input_file, ignored_attr_types=None,
 282       max_entries=0, process_url_schemes=None, line_sep='\n'):
 283         """
 284         Parameters:
 285         input_file
 286             File-object to read the LDIF input from
 287         ignored_attr_types
 288             Attributes with these attribute type names will be ignored.
 289         max_entries
 290             If non-zero specifies the maximum number of entries to be
 291             read from f.
 292         process_url_schemes
 293             List containing strings with URLs schemes to process with urllib.
 294             An empty list turns off all URL processing and the attribute
 295             is ignored completely.
 296         line_sep
 297             String used as line separator
 298         """
 299         self._input_file = input_file
 300         self._max_entries = max_entries
 301         self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
 302         self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
 303         self._line_sep = line_sep
 304         self.records_read = 0
 305
 306     def handle(self, dn, entry):
 307         """
 308         Process a single content LDIF record. This method should be
 309         implemented by applications using LDIFParser.
 310         """
 311
 312     def _unfoldLDIFLine(self):
 313         """
 314         Unfold several folded lines with trailing space into one line
 315         """
 316         unfolded_lines = [ self._stripLineSep(self._line) ]
 317         self._line = self._input_file.readline()
 318
 319         while self._line and self._line[0] == ' ':
 320             unfolded_lines.append(self._stripLineSep(self._line[1:]))
 321             self._line = self._input_file.readline()
 322
 323         return ''.join(unfolded_lines)
 324
 325     def _parseAttrTypeandValue(self):
 326         """
 327         Parse a single attribute type and value pair from one or
 328         more lines of LDIF data
 329         """
 330         # Reading new attribute line
 331         unfolded_line = self._unfoldLDIFLine()
 332
 333         # Ignore comments which can also be folded
 334         while unfolded_line and unfolded_line[0] == '#':
 335             unfolded_line = self._unfoldLDIFLine()
 336
 337         if not unfolded_line or unfolded_line == '\n' or unfolded_line == '\r\n':
 338             return None, None
 339
 340         try:
 341             colon_pos = unfolded_line.index(':')
 342         except ValueError:
 343             # Treat malformed lines without colon as non-existent
 344             log.error("Malformed line: %s" % unfolded_line)
 345             #return None, None
 346             return '', ''
 347
 348         attr_type = unfolded_line[0:colon_pos]
 349
 350         # if needed attribute value is BASE64 decoded
 351         value_spec = unfolded_line[colon_pos:colon_pos+2]
 352
 353         if value_spec=='::':
 354             # attribute value needs base64-decoding
 355             attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
 356
 357         elif value_spec==':<':
 358             # fetch attribute value from URL
 359             url = unfolded_line[colon_pos+2:].strip()
 360             attr_value = None
 361             if self._process_url_schemes:
 362                 u = urlparse.urlparse(url)
 363                 if self._process_url_schemes.has_key(u[0]):
 364                     attr_value = urllib.urlopen(url).read()
 365
 366         elif value_spec==':\r\n' or value_spec=='\n':
 367             attr_value = ''
 368
 369         else:
 370             attr_value = unfolded_line[colon_pos+2:].lstrip()
 371
 372         return attr_type, attr_value
 373
 374     def parse(self):
 375         """
 376         Continously read and parse LDIF records
 377         """
 378         self._line = self._input_file.readline()
 379
 380         while self._line and \
 381               (not self._max_entries or self.records_read<self._max_entries):
 382
 383             # Reset record
 384             version = None
 385             dn = None
 386             changetype = None
 387             modop = None
 388             entry = {}
 389
 390             attr_type, attr_value = self._parseAttrTypeandValue()
 391
 392             while attr_type is not None and attr_value is not None:
 393                 attr_type = attr_type.lower()
 394                 log.debug("%s ---> %s" % (repr(attr_type), repr(attr_value)))
 395
 396                 if not attr_type or not attr_value:
 397                     attr_type, attr_value = self._parseAttrTypeandValue()
 398                     continue
 399
 400                 if attr_type == 'dn':
 401
 402                     # attr type and value pair was DN of LDIF record
 403                     if dn is not None:
 404                         raise ValueError, 'Two lines starting with dn: in one record.'
 405
 406                     if not is_dn(attr_value):
 407                         raise ValueError, 'No valid string-representation of distinguished name %s.' % (repr(attr_value))
 408                     dn = attr_value
 409
 410                 elif attr_type == 'version' and dn is None:
 411                     version = 1
 412
 413                 elif attr_type == 'changetype':
 414                     # attr type and value pair was DN of LDIF record
 415                     if dn is None:
 416                         raise ValueError, 'Read changetype: before getting valid dn: line.'
 417
 418                     if changetype is not None:
 419                         raise ValueError, 'Two lines starting with changetype: in one record.'
 420
 421                     if not attr_value in valid_changetype_dict:
 422                         raise ValueError, 'changetype value %s is invalid.' % (repr(attr_value))
 423
 424                     changetype = attr_value
 425
 426                 elif attr_value is not None and \
 427                      not self._ignored_attr_types.has_key(attr_type.lower()):
 428
 429                     # Add the attribute to the entry if not ignored attribute
 430                     if attr_type in entry:
 431                         entry[attr_type].append(attr_value)
 432                     else:
 433                         entry[attr_type]=[attr_value]
 434
 435                 # Read the next line within an entry
 436                 attr_type, attr_value = self._parseAttrTypeandValue()
 437
 438             if entry:
 439                 # append entry to result list
 440                 self.handle(dn, entry)
 441                 self.records_read += 1
 442
 443         return # parse()
 444
 445
 446 class LDIFRecordList(LDIFParser):
 447     """
 448     Collect all records of LDIF input into a single list.
 449     of 2-tuples (dn, entry). It can be a memory hog!
 450     """
 451
 452     def __init__(self, input_file, ignored_attr_types=None,
 453         max_entries=0, process_url_schemes=None):
 454         """
 455         See LDIFParser.__init__()
 456
 457         Additional Parameters:
 458         all_records
 459             List instance for storing parsed records
 460         """
 461         LDIFParser.__init__(self, input_file, ignored_attr_types, max_entries, process_url_schemes)
 462         self.all_records = []
 463
 464     def handle(self, dn, entry):
 465         """
 466         Append single record to dictionary of all records.
 467         """
 468         self.all_records.append((dn, entry))
 469
 470
 471 class LDIFCopy(LDIFParser):
 472     """
 473     Copy LDIF input to LDIF output containing all data retrieved
 474     via URLs
 475     """
 476
 477     def __init__(self, input_file, output_file, ignored_attr_types=None,
 478         max_entries=0, process_url_schemes=None, base64_attrs=None,
 479         cols=76, line_sep='\n'):
 480         """
 481         See LDIFParser.__init__() and LDIFWriter.__init__()
 482         """
 483         LDIFParser.__init__(self, input_file, ignored_attr_types, max_entries, process_url_schemes)
 484         self._output_ldif = LDIFWriter(output_file, base64_attrs, cols, line_sep)
 485
 486     def handle(self, dn, entry):
 487         """
 488         Write single LDIF record to output file.
 489         """
 490         self._output_ldif.unparse(dn, entry)
 491
 492
 493 def ParseLDIF(f, ignore_attrs=None, maxentries=0):
 494     """
 495     Parse LDIF records read from file.
 496     This is a compability function. Use is deprecated!
 497     """
 498     ldif_parser = LDIFRecordList(f, ignored_attr_types=ignore_attrs,
 499         max_entries=maxentries, process_url_schemes=0)
 500     ldif_parser.parse()
 501     return ldif_parser.all_records