1 # -*- test-case-name: twisted.words.test.test_jabberxmppstringprep -*-
3 # Copyright (c) Twisted Matrix Laboratories.
4 # See LICENSE for details.
7 from zope.interface import Interface, implements
9 if sys.version_info < (2,3,2):
13 dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
14 def nameprep(self, label):
21 warnings.warn("Accented and non-Western Jabber IDs will not be properly "
22 "case-folded with this version of Python, resulting in "
23 "incorrect protocol-level behavior. It is strongly "
24 "recommended you upgrade to Python 2.3.2 or newer if you "
25 "intend to use Twisted's Jabber support.")
29 # We require Unicode version 3.2. Python 2.5 and later provide this as
30 # a separate object. Before that the unicodedata module uses 3.2.
32 from unicodedata import ucd_3_2_0 as unicodedata
35 from encodings import idna
41 class ILookupTable(Interface):
42 """ Interface for character lookup classes. """
45 """ Return whether character is in this table. """
47 class IMappingTable(Interface):
48 """ Interface for character mapping classes. """
51 """ Return mapping for character. """
53 class LookupTableFromFunction:
55 implements(ILookupTable)
57 def __init__(self, in_table_function):
58 self.lookup = in_table_function
62 implements(ILookupTable)
64 def __init__(self, table):
68 return c in self._table
70 class MappingTableFromFunction:
72 implements(IMappingTable)
74 def __init__(self, map_table_function):
75 self.map = map_table_function
77 class EmptyMappingTable:
79 implements(IMappingTable)
81 def __init__(self, in_table_function):
82 self._in_table_function = in_table_function
85 if self._in_table_function(c):
91 def __init__(self, mappings=[], normalize=True, prohibiteds=[],
92 check_unassigneds=True, check_bidi=True):
93 self.mappings = mappings
94 self.normalize = normalize
95 self.prohibiteds = prohibiteds
96 self.do_check_unassigneds = check_unassigneds
97 self.do_check_bidi = check_bidi
99 def prepare(self, string):
100 result = self.map(string)
102 result = unicodedata.normalize("NFKC", result)
103 self.check_prohibiteds(result)
104 if self.do_check_unassigneds:
105 self.check_unassigneds(result)
106 if self.do_check_bidi:
107 self.check_bidirectionals(result)
110 def map(self, string):
116 for mapping in self.mappings:
117 result_c = mapping.map(c)
121 if result_c is not None:
122 result.append(result_c)
124 return u"".join(result)
126 def check_prohibiteds(self, string):
128 for table in self.prohibiteds:
130 raise UnicodeError, "Invalid character %s" % repr(c)
132 def check_unassigneds(self, string):
134 if stringprep.in_table_a1(c):
135 raise UnicodeError, "Unassigned code point %s" % repr(c)
137 def check_bidirectionals(self, string):
139 found_RandALCat = False
142 if stringprep.in_table_d1(c):
143 found_RandALCat = True
144 if stringprep.in_table_d2(c):
147 if found_LCat and found_RandALCat:
148 raise UnicodeError, "Violation of BIDI Requirement 2"
150 if found_RandALCat and not (stringprep.in_table_d1(string[0]) and
151 stringprep.in_table_d1(string[-1])):
152 raise UnicodeError, "Violation of BIDI Requirement 3"
156 """ Implements preparation of internationalized domain names.
158 This class implements preparing internationalized domain names using the
159 rules defined in RFC 3491, section 4 (Conversion operations).
161 We do not perform step 4 since we deal with unicode representations of
162 domain names and do not convert from or to ASCII representations using
163 punycode encoding. When such a conversion is needed, the C{idna} standard
164 library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that
165 C{idna} itself assumes UseSTD3ASCIIRules to be false.
167 The following steps are performed by C{prepare()}:
169 - Split the domain name in labels at the dots (RFC 3490, 3.1)
170 - Apply nameprep proper on each label (RFC 3491)
171 - Enforce the restrictions on ASCII characters in host names by
172 assuming STD3ASCIIRules to be true. (STD 3)
173 - Rejoin the labels using the label separator U+002E (full stop).
177 # Prohibited characters.
178 prohibiteds = [unichr(n) for n in range(0x00, 0x2c + 1) +
179 range(0x2e, 0x2f + 1) +
180 range(0x3a, 0x40 + 1) +
181 range(0x5b, 0x60 + 1) +
182 range(0x7b, 0x7f + 1) ]
184 def prepare(self, string):
187 labels = idna.dots.split(string)
189 if labels and len(labels[-1]) == 0:
196 result.append(self.nameprep(label))
198 return ".".join(result) + trailing_dot
200 def check_prohibiteds(self, string):
202 if c in self.prohibiteds:
203 raise UnicodeError, "Invalid character %s" % repr(c)
205 def nameprep(self, label):
206 label = idna.nameprep(label)
207 self.check_prohibiteds(label)
209 raise UnicodeError, "Invalid leading hyphen-minus"
211 raise UnicodeError, "Invalid trailing hyphen-minus"
215 case_map = MappingTableFromFunction(lambda c: c.lower())
216 nodeprep = Profile(mappings=[case_map],
218 prohibiteds=[LookupTable([u' ', u'"', u'&', u"'", u'/',
219 u':', u'<', u'>', u'@'])],
220 check_unassigneds=False,
223 resourceprep = Profile(normalize=False,
224 check_unassigneds=False,
228 C_11 = LookupTableFromFunction(stringprep.in_table_c11)
229 C_12 = LookupTableFromFunction(stringprep.in_table_c12)
230 C_21 = LookupTableFromFunction(stringprep.in_table_c21)
231 C_22 = LookupTableFromFunction(stringprep.in_table_c22)
232 C_3 = LookupTableFromFunction(stringprep.in_table_c3)
233 C_4 = LookupTableFromFunction(stringprep.in_table_c4)
234 C_5 = LookupTableFromFunction(stringprep.in_table_c5)
235 C_6 = LookupTableFromFunction(stringprep.in_table_c6)
236 C_7 = LookupTableFromFunction(stringprep.in_table_c7)
237 C_8 = LookupTableFromFunction(stringprep.in_table_c8)
238 C_9 = LookupTableFromFunction(stringprep.in_table_c9)
240 B_1 = EmptyMappingTable(stringprep.in_table_b1)
241 B_2 = MappingTableFromFunction(stringprep.map_table_b2)
243 nodeprep = Profile(mappings=[B_1, B_2],
244 prohibiteds=[C_11, C_12, C_21, C_22,
245 C_3, C_4, C_5, C_6, C_7, C_8, C_9,
246 LookupTable([u'"', u'&', u"'", u'/',
247 u':', u'<', u'>', u'@'])])
249 resourceprep = Profile(mappings=[B_1,],
250 prohibiteds=[C_12, C_21, C_22,
251 C_3, C_4, C_5, C_6, C_7, C_8, C_9])
253 nameprep = NamePrep()