2 ldif - generate and parse LDIF data (see RFC 2849)
3 written by Michael Stroeder <michael@stroeder.com>
5 See http://python-ldap.sourceforge.net for details.
7 $Id: ldif.py,v 1.3 2008/02/11 16:35:43 dwelch Exp $
9 Python compability note:
10 Tested with Python 2.0+, but should work with Python 1.5.2+.
12 The python-ldap package is distributed under Python-style license.
15 This software is made available by the author(s) to the public for free
16 and "as is". All users of this free software are solely and entirely
17 responsible for their own choice and use of this software for their
18 own purposes. By using this software, each user agrees that the
19 author(s) shall not be liable for damages of any kind in relation to
20 its use or performance. The author(s) do not warrant that this software
21 is fit for any purpose.
23 Note: This file is part of the python-ldap package. For the complete
24 python-ldap package, please visit: http://sourceforge.net/projects/python-ldap/
26 It has been modified for use in HPLIP.
36 'AttrTypeandValueLDIF', 'CreateLDIF', 'ParseLDIF',
45 import urllib # TODO: Replace with urllib2 (urllib is deprecated in Python 3.0)
51 from cStringIO import StringIO
53 from StringIO import StringIO
57 attrtype_pattern = r'[\w;.]+(;[\w_-]+)*'
58 attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
59 rdn_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
60 dn_pattern = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
61 dn_regex = re.compile('^%s$' % dn_pattern)
63 ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()
66 'add':0, 'delete':1, 'replace':2
70 0:'add', 1:'delete', 2:'replace'
73 CHANGE_TYPES = ['add', 'delete', 'modify', 'modrdn']
74 valid_changetype_dict = {}
75 for c in CHANGE_TYPES:
76 valid_changetype_dict[c]=None
79 SAFE_STRING_PATTERN = '(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
80 safe_string_re = re.compile(SAFE_STRING_PATTERN)
84 returns 1 if s is a LDAP DN
88 rm = dn_regex.match(s)
89 return rm!=None and rm.group(0)==s
94 returns 1 if s has to be base-64 encoded because of special chars
96 return not safe_string_re.search(s) is None
101 return a dictionary with all items of l being the keys of the dictionary
103 return dict([(i, None) for i in l])
108 Write LDIF entry or change records to file object
109 Copy LDIF input to a file output object containing all data retrieved
113 def __init__(self, output_file, base64_attrs=None, cols=76, line_sep='\n'):
116 file object for output
118 list of attribute types to be base64-encoded in any case
120 Specifies how many columns a line may have before it's
121 folded into many lines.
123 String used as line separator
125 self._output_file = output_file
126 self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
128 self._line_sep = line_sep
129 self.records_written = 0
131 def _unfoldLDIFLine(self, line):
133 Write string line as one or more folded lines
135 # Check maximum line length
137 if line_len<=self._cols:
138 self._output_file.write(line)
139 self._output_file.write(self._line_sep)
143 self._output_file.write(line[0:min(line_len, self._cols)])
144 self._output_file.write(self._line_sep)
146 self._output_file.write(' ')
147 self._output_file.write(line[pos:min(line_len, pos+self._cols-1)])
148 self._output_file.write(self._line_sep)
149 pos = pos+self._cols-1
150 return # _unfoldLDIFLine()
152 def _unparseAttrTypeandValue(self, attr_type, attr_value):
154 Write a single attribute type/value pair
161 if self._base64_attrs.has_key(attr_type.lower()) or \
162 needs_base64(attr_value):
164 self._unfoldLDIFLine(':: '.join([attr_type, base64.encodestring(attr_value).replace('\n', '')]))
166 self._unfoldLDIFLine(': '.join([attr_type, attr_value]))
167 return # _unparseAttrTypeandValue()
169 def _unparseEntryRecord(self, entry):
172 dictionary holding an entry
174 attr_types = entry.keys()[:]
176 for attr_type in attr_types:
177 for attr_value in entry[attr_type]:
178 self._unparseAttrTypeandValue(attr_type, attr_value)
180 def _unparseChangeRecord(self, modlist):
183 list of additions (2-tuple) or modifications (3-tuple)
185 mod_len = len(modlist[0])
189 changetype = 'modify'
191 raise ValueError, "modlist item of wrong length"
192 self._unparseAttrTypeandValue('changetype', changetype)
195 mod_type, mod_vals = mod
197 mod_op, mod_type, mod_vals = mod
198 self._unparseAttrTypeandValue(MOD_OP_STR[mod_op], mod_type)
200 raise ValueError, "Subsequent modlist item of wrong length"
202 for mod_val in mod_vals:
203 self._unparseAttrTypeandValue(mod_type, mod_val)
205 self._output_file.write('-'+self._line_sep)
207 def unparse(self, dn, record):
210 string-representation of distinguished name
212 Either a dictionary holding the LDAP entry {attrtype:record}
213 or a list with a modify list like for LDAPObject.modify().
216 # Simply ignore empty records
218 # Start with line containing the distinguished name
219 self._unparseAttrTypeandValue('dn', dn)
220 # Dispatch to record type specific writers
221 if isinstance(record, types.DictType):
222 self._unparseEntryRecord(record)
223 elif isinstance(record, types.ListType):
224 self._unparseChangeRecord(record)
226 raise ValueError, "Argument record must be dictionary or list"
227 # Write empty line separating the records
228 self._output_file.write(self._line_sep)
229 # Count records written
230 self.records_written = self.records_written+1
234 def CreateLDIF(dn, record, base64_attrs=None, cols=76):
236 Create LDIF single formatted record including trailing empty line.
237 This is a compability function. Use is deprecated!
240 string-representation of distinguished name
242 Either a dictionary holding the LDAP entry {attrtype:record}
243 or a list with a modify list like for LDAPObject.modify().
245 list of attribute types to be base64-encoded in any case
247 Specifies how many columns a line may have before it's
248 folded into many lines.
251 ldif_writer = LDIFWriter(f, base64_attrs, cols, '\n')
252 ldif_writer.unparse(dn, record)
260 Base class for a LDIF parser. Applications should sub-class this
261 class and override method handle() to implement something meaningful.
263 Public class attributes:
265 Counter for records processed so far
268 def _stripLineSep(self, s):
270 Strip trailing line separators from s, but no other whitespaces
272 return s.strip('\n').strip('\r')
274 ## if s[-2:] == '\r\n':
276 ## elif s[-1:] == '\n':
281 def __init__(self, input_file, ignored_attr_types=None,
282 max_entries=0, process_url_schemes=None, line_sep='\n'):
286 File-object to read the LDIF input from
288 Attributes with these attribute type names will be ignored.
290 If non-zero specifies the maximum number of entries to be
293 List containing strings with URLs schemes to process with urllib.
294 An empty list turns off all URL processing and the attribute
295 is ignored completely.
297 String used as line separator
299 self._input_file = input_file
300 self._max_entries = max_entries
301 self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
302 self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
303 self._line_sep = line_sep
304 self.records_read = 0
306 def handle(self, dn, entry):
308 Process a single content LDIF record. This method should be
309 implemented by applications using LDIFParser.
312 def _unfoldLDIFLine(self):
314 Unfold several folded lines with trailing space into one line
316 unfolded_lines = [ self._stripLineSep(self._line) ]
317 self._line = self._input_file.readline()
319 while self._line and self._line[0] == ' ':
320 unfolded_lines.append(self._stripLineSep(self._line[1:]))
321 self._line = self._input_file.readline()
323 return ''.join(unfolded_lines)
325 def _parseAttrTypeandValue(self):
327 Parse a single attribute type and value pair from one or
328 more lines of LDIF data
330 # Reading new attribute line
331 unfolded_line = self._unfoldLDIFLine()
333 # Ignore comments which can also be folded
334 while unfolded_line and unfolded_line[0] == '#':
335 unfolded_line = self._unfoldLDIFLine()
337 if not unfolded_line or unfolded_line == '\n' or unfolded_line == '\r\n':
341 colon_pos = unfolded_line.index(':')
343 # Treat malformed lines without colon as non-existent
344 log.error("Malformed line: %s" % unfolded_line)
348 attr_type = unfolded_line[0:colon_pos]
350 # if needed attribute value is BASE64 decoded
351 value_spec = unfolded_line[colon_pos:colon_pos+2]
354 # attribute value needs base64-decoding
355 attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
357 elif value_spec==':<':
358 # fetch attribute value from URL
359 url = unfolded_line[colon_pos+2:].strip()
361 if self._process_url_schemes:
362 u = urlparse.urlparse(url)
363 if self._process_url_schemes.has_key(u[0]):
364 attr_value = urllib.urlopen(url).read()
366 elif value_spec==':\r\n' or value_spec=='\n':
370 attr_value = unfolded_line[colon_pos+2:].lstrip()
372 return attr_type, attr_value
376 Continously read and parse LDIF records
378 self._line = self._input_file.readline()
380 while self._line and \
381 (not self._max_entries or self.records_read<self._max_entries):
390 attr_type, attr_value = self._parseAttrTypeandValue()
392 while attr_type is not None and attr_value is not None:
393 attr_type = attr_type.lower()
394 log.debug("%s ---> %s" % (repr(attr_type), repr(attr_value)))
396 if not attr_type or not attr_value:
397 attr_type, attr_value = self._parseAttrTypeandValue()
400 if attr_type == 'dn':
402 # attr type and value pair was DN of LDIF record
404 raise ValueError, 'Two lines starting with dn: in one record.'
406 if not is_dn(attr_value):
407 raise ValueError, 'No valid string-representation of distinguished name %s.' % (repr(attr_value))
410 elif attr_type == 'version' and dn is None:
413 elif attr_type == 'changetype':
414 # attr type and value pair was DN of LDIF record
416 raise ValueError, 'Read changetype: before getting valid dn: line.'
418 if changetype is not None:
419 raise ValueError, 'Two lines starting with changetype: in one record.'
421 if not attr_value in valid_changetype_dict:
422 raise ValueError, 'changetype value %s is invalid.' % (repr(attr_value))
424 changetype = attr_value
426 elif attr_value is not None and \
427 not self._ignored_attr_types.has_key(attr_type.lower()):
429 # Add the attribute to the entry if not ignored attribute
430 if attr_type in entry:
431 entry[attr_type].append(attr_value)
433 entry[attr_type]=[attr_value]
435 # Read the next line within an entry
436 attr_type, attr_value = self._parseAttrTypeandValue()
439 # append entry to result list
440 self.handle(dn, entry)
441 self.records_read += 1
446 class LDIFRecordList(LDIFParser):
448 Collect all records of LDIF input into a single list.
449 of 2-tuples (dn, entry). It can be a memory hog!
452 def __init__(self, input_file, ignored_attr_types=None,
453 max_entries=0, process_url_schemes=None):
455 See LDIFParser.__init__()
457 Additional Parameters:
459 List instance for storing parsed records
461 LDIFParser.__init__(self, input_file, ignored_attr_types, max_entries, process_url_schemes)
462 self.all_records = []
464 def handle(self, dn, entry):
466 Append single record to dictionary of all records.
468 self.all_records.append((dn, entry))
471 class LDIFCopy(LDIFParser):
473 Copy LDIF input to LDIF output containing all data retrieved
477 def __init__(self, input_file, output_file, ignored_attr_types=None,
478 max_entries=0, process_url_schemes=None, base64_attrs=None,
479 cols=76, line_sep='\n'):
481 See LDIFParser.__init__() and LDIFWriter.__init__()
483 LDIFParser.__init__(self, input_file, ignored_attr_types, max_entries, process_url_schemes)
484 self._output_ldif = LDIFWriter(output_file, base64_attrs, cols, line_sep)
486 def handle(self, dn, entry):
488 Write single LDIF record to output file.
490 self._output_ldif.unparse(dn, entry)
493 def ParseLDIF(f, ignore_attrs=None, maxentries=0):
495 Parse LDIF records read from file.
496 This is a compability function. Use is deprecated!
498 ldif_parser = LDIFRecordList(f, ignored_attr_types=ignore_attrs,
499 max_entries=maxentries, process_url_schemes=0)
501 return ldif_parser.all_records