Imported Upstream version 4.8.1
[platform/upstream/gcc48.git] / libgo / go / strings / replace_test.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package strings_test
6
7 import (
8         "bytes"
9         "fmt"
10         . "strings"
11         "testing"
12 )
13
14 var htmlEscaper = NewReplacer(
15         "&", "&",
16         "<", "&lt;",
17         ">", "&gt;",
18         `"`, "&quot;",
19         "'", "&apos;",
20 )
21
22 var htmlUnescaper = NewReplacer(
23         "&amp;", "&",
24         "&lt;", "<",
25         "&gt;", ">",
26         "&quot;", `"`,
27         "&apos;", "'",
28 )
29
30 // The http package's old HTML escaping function.
31 func oldHTMLEscape(s string) string {
32         s = Replace(s, "&", "&amp;", -1)
33         s = Replace(s, "<", "&lt;", -1)
34         s = Replace(s, ">", "&gt;", -1)
35         s = Replace(s, `"`, "&quot;", -1)
36         s = Replace(s, "'", "&apos;", -1)
37         return s
38 }
39
40 var capitalLetters = NewReplacer("a", "A", "b", "B")
41
42 // TestReplacer tests the replacer implementations.
43 func TestReplacer(t *testing.T) {
44         type testCase struct {
45                 r       *Replacer
46                 in, out string
47         }
48         var testCases []testCase
49
50         // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
51         str := func(b byte) string {
52                 return string([]byte{b})
53         }
54         var s []string
55
56         // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
57         s = nil
58         for i := 0; i < 256; i++ {
59                 s = append(s, str(byte(i)), str(byte(i+1)))
60         }
61         inc := NewReplacer(s...)
62
63         // Test cases with 1-byte old strings, 1-byte new strings.
64         testCases = append(testCases,
65                 testCase{capitalLetters, "brad", "BrAd"},
66                 testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
67                 testCase{capitalLetters, "", ""},
68
69                 testCase{inc, "brad", "csbe"},
70                 testCase{inc, "\x00\xff", "\x01\x00"},
71                 testCase{inc, "", ""},
72
73                 testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
74         )
75
76         // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
77         s = nil
78         for i := 0; i < 256; i++ {
79                 n := i + 1 - 'a'
80                 if n < 1 {
81                         n = 1
82                 }
83                 s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
84         }
85         repeat := NewReplacer(s...)
86
87         // Test cases with 1-byte old strings, variable length new strings.
88         testCases = append(testCases,
89                 testCase{htmlEscaper, "No changes", "No changes"},
90                 testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
91                 testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
92                 testCase{htmlEscaper, "", ""},
93
94                 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95                 testCase{repeat, "abba", "abbbba"},
96                 testCase{repeat, "", ""},
97
98                 testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
99         )
100
101         // The remaining test cases have variable length old strings.
102
103         testCases = append(testCases,
104                 testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
105                 testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
106                 testCase{htmlUnescaper, "", ""},
107
108                 testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
109
110                 testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
111
112                 testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
113         )
114
115         // gen1 has multiple old strings of variable length. There is no
116         // overall non-empty common prefix, but some pairwise common prefixes.
117         gen1 := NewReplacer(
118                 "aaa", "3[aaa]",
119                 "aa", "2[aa]",
120                 "a", "1[a]",
121                 "i", "i",
122                 "longerst", "most long",
123                 "longer", "medium",
124                 "long", "short",
125                 "xx", "xx",
126                 "x", "X",
127                 "X", "Y",
128                 "Y", "Z",
129         )
130         testCases = append(testCases,
131                 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
132                 testCase{gen1, "long, longerst, longer", "short, most long, medium"},
133                 testCase{gen1, "xxxxx", "xxxxX"},
134                 testCase{gen1, "XiX", "YiY"},
135                 testCase{gen1, "", ""},
136         )
137
138         // gen2 has multiple old strings with no pairwise common prefix.
139         gen2 := NewReplacer(
140                 "roses", "red",
141                 "violets", "blue",
142                 "sugar", "sweet",
143         )
144         testCases = append(testCases,
145                 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
146                 testCase{gen2, "", ""},
147         )
148
149         // gen3 has multiple old strings with an overall common prefix.
150         gen3 := NewReplacer(
151                 "abracadabra", "poof",
152                 "abracadabrakazam", "splat",
153                 "abraham", "lincoln",
154                 "abrasion", "scrape",
155                 "abraham", "isaac",
156         )
157         testCases = append(testCases,
158                 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
159                 testCase{gen3, "abrasion abracad", "scrape abracad"},
160                 testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
161                 testCase{gen3, "", ""},
162         )
163
164         // foo{1,2,3,4} have multiple old strings with an overall common prefix
165         // and 1- or 2- byte extensions from the common prefix.
166         foo1 := NewReplacer(
167                 "foo1", "A",
168                 "foo2", "B",
169                 "foo3", "C",
170         )
171         foo2 := NewReplacer(
172                 "foo1", "A",
173                 "foo2", "B",
174                 "foo31", "C",
175                 "foo32", "D",
176         )
177         foo3 := NewReplacer(
178                 "foo11", "A",
179                 "foo12", "B",
180                 "foo31", "C",
181                 "foo32", "D",
182         )
183         foo4 := NewReplacer(
184                 "foo12", "B",
185                 "foo32", "D",
186         )
187         testCases = append(testCases,
188                 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
189                 testCase{foo1, "", ""},
190
191                 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
192                 testCase{foo2, "", ""},
193
194                 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
195                 testCase{foo3, "", ""},
196
197                 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
198                 testCase{foo4, "", ""},
199         )
200
201         // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
202         allBytes := make([]byte, 256)
203         for i := range allBytes {
204                 allBytes[i] = byte(i)
205         }
206         allString := string(allBytes)
207         genAll := NewReplacer(
208                 allString, "[all]",
209                 "\xff", "[ff]",
210                 "\x00", "[00]",
211         )
212         testCases = append(testCases,
213                 testCase{genAll, allString, "[all]"},
214                 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
215                 testCase{genAll, "", ""},
216         )
217
218         // Test cases with empty old strings.
219
220         blankToX1 := NewReplacer("", "X")
221         blankToX2 := NewReplacer("", "X", "", "")
222         blankHighPriority := NewReplacer("", "X", "o", "O")
223         blankLowPriority := NewReplacer("o", "O", "", "X")
224         blankNoOp1 := NewReplacer("", "")
225         blankNoOp2 := NewReplacer("", "", "", "A")
226         blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
227         testCases = append(testCases,
228                 testCase{blankToX1, "foo", "XfXoXoX"},
229                 testCase{blankToX1, "", "X"},
230
231                 testCase{blankToX2, "foo", "XfXoXoX"},
232                 testCase{blankToX2, "", "X"},
233
234                 testCase{blankHighPriority, "oo", "XOXOX"},
235                 testCase{blankHighPriority, "ii", "XiXiX"},
236                 testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
237                 testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
238                 testCase{blankHighPriority, "", "X"},
239
240                 testCase{blankLowPriority, "oo", "OOX"},
241                 testCase{blankLowPriority, "ii", "XiXiX"},
242                 testCase{blankLowPriority, "oiio", "OXiXiOX"},
243                 testCase{blankLowPriority, "iooi", "XiOOXiX"},
244                 testCase{blankLowPriority, "", "X"},
245
246                 testCase{blankNoOp1, "foo", "foo"},
247                 testCase{blankNoOp1, "", ""},
248
249                 testCase{blankNoOp2, "foo", "foo"},
250                 testCase{blankNoOp2, "", ""},
251
252                 testCase{blankFoo, "foobarfoobaz", "XRXZX"},
253                 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
254                 testCase{blankFoo, "", "X"},
255         )
256
257         // single string replacer
258
259         abcMatcher := NewReplacer("abc", "[match]")
260
261         testCases = append(testCases,
262                 testCase{abcMatcher, "", ""},
263                 testCase{abcMatcher, "ab", "ab"},
264                 testCase{abcMatcher, "abcd", "[match]d"},
265                 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
266         )
267
268         // No-arg test cases.
269
270         nop := NewReplacer()
271         testCases = append(testCases,
272                 testCase{nop, "abc", "abc"},
273                 testCase{nop, "", ""},
274         )
275
276         // Run the test cases.
277
278         for i, tc := range testCases {
279                 if s := tc.r.Replace(tc.in); s != tc.out {
280                         t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
281                 }
282                 var buf bytes.Buffer
283                 n, err := tc.r.WriteString(&buf, tc.in)
284                 if err != nil {
285                         t.Errorf("%d. WriteString: %v", i, err)
286                         continue
287                 }
288                 got := buf.String()
289                 if got != tc.out {
290                         t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
291                         continue
292                 }
293                 if n != len(tc.out) {
294                         t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
295                                 i, tc.in, n, len(tc.out), tc.out)
296                 }
297         }
298 }
299
300 // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
301 func TestPickAlgorithm(t *testing.T) {
302         testCases := []struct {
303                 r    *Replacer
304                 want string
305         }{
306                 {capitalLetters, "*strings.byteReplacer"},
307                 {htmlEscaper, "*strings.byteStringReplacer"},
308                 {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
309                 {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
310                 {NewReplacer("", "X"), "*strings.genericReplacer"},
311                 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
312         }
313         for i, tc := range testCases {
314                 got := fmt.Sprintf("%T", tc.r.Replacer())
315                 if got != tc.want {
316                         t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
317                 }
318         }
319 }
320
321 // TestGenericTrieBuilding verifies the structure of the generated trie. There
322 // is one node per line, and the key ending with the current line is in the
323 // trie if it ends with a "+".
324 func TestGenericTrieBuilding(t *testing.T) {
325         testCases := []struct{ in, out string }{
326                 {"abc;abdef;abdefgh;xx;xy;z", `-
327                         a-
328                         .b-
329                         ..c+
330                         ..d-
331                         ...ef+
332                         .....gh+
333                         x-
334                         .x+
335                         .y+
336                         z+
337                         `},
338                 {"abracadabra;abracadabrakazam;abraham;abrasion", `-
339                         a-
340                         .bra-
341                         ....c-
342                         .....adabra+
343                         ...........kazam+
344                         ....h-
345                         .....am+
346                         ....s-
347                         .....ion+
348                         `},
349                 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
350                         X+
351                         Y+
352                         a+
353                         .a+
354                         ..a+
355                         i+
356                         l-
357                         .ong+
358                         ....er+
359                         ......st+
360                         x+
361                         .x+
362                         `},
363                 {"foo;;foo;foo1", `+
364                         f-
365                         .oo+
366                         ...1+
367                         `},
368         }
369
370         for _, tc := range testCases {
371                 keys := Split(tc.in, ";")
372                 args := make([]string, len(keys)*2)
373                 for i, key := range keys {
374                         args[i*2] = key
375                 }
376
377                 got := NewReplacer(args...).PrintTrie()
378                 // Remove tabs from tc.out
379                 wantbuf := make([]byte, 0, len(tc.out))
380                 for i := 0; i < len(tc.out); i++ {
381                         if tc.out[i] != '\t' {
382                                 wantbuf = append(wantbuf, tc.out[i])
383                         }
384                 }
385                 want := string(wantbuf)
386
387                 if got != want {
388                         t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
389                 }
390         }
391 }
392
393 func BenchmarkGenericNoMatch(b *testing.B) {
394         str := Repeat("A", 100) + Repeat("B", 100)
395         generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
396         for i := 0; i < b.N; i++ {
397                 generic.Replace(str)
398         }
399 }
400
401 func BenchmarkGenericMatch1(b *testing.B) {
402         str := Repeat("a", 100) + Repeat("b", 100)
403         generic := NewReplacer("a", "A", "b", "B", "12", "123")
404         for i := 0; i < b.N; i++ {
405                 generic.Replace(str)
406         }
407 }
408
409 func BenchmarkGenericMatch2(b *testing.B) {
410         str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
411         for i := 0; i < b.N; i++ {
412                 htmlUnescaper.Replace(str)
413         }
414 }
415
416 func benchmarkSingleString(b *testing.B, pattern, text string) {
417         r := NewReplacer(pattern, "[match]")
418         b.SetBytes(int64(len(text)))
419         b.ResetTimer()
420         for i := 0; i < b.N; i++ {
421                 r.Replace(text)
422         }
423 }
424
425 func BenchmarkSingleMaxSkipping(b *testing.B) {
426         benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
427 }
428
429 func BenchmarkSingleLongSuffixFail(b *testing.B) {
430         benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
431 }
432
433 func BenchmarkSingleMatch(b *testing.B) {
434         benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
435 }
436
437 func BenchmarkByteByteNoMatch(b *testing.B) {
438         str := Repeat("A", 100) + Repeat("B", 100)
439         for i := 0; i < b.N; i++ {
440                 capitalLetters.Replace(str)
441         }
442 }
443
444 func BenchmarkByteByteMatch(b *testing.B) {
445         str := Repeat("a", 100) + Repeat("b", 100)
446         for i := 0; i < b.N; i++ {
447                 capitalLetters.Replace(str)
448         }
449 }
450
451 func BenchmarkByteStringMatch(b *testing.B) {
452         str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
453         for i := 0; i < b.N; i++ {
454                 htmlEscaper.Replace(str)
455         }
456 }
457
458 func BenchmarkHTMLEscapeNew(b *testing.B) {
459         str := "I <3 to escape HTML & other text too."
460         for i := 0; i < b.N; i++ {
461                 htmlEscaper.Replace(str)
462         }
463 }
464
465 func BenchmarkHTMLEscapeOld(b *testing.B) {
466         str := "I <3 to escape HTML & other text too."
467         for i := 0; i < b.N; i++ {
468                 oldHTMLEscape(str)
469         }
470 }
471
472 // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
473 func BenchmarkByteByteReplaces(b *testing.B) {
474         str := Repeat("a", 100) + Repeat("b", 100)
475         for i := 0; i < b.N; i++ {
476                 Replace(Replace(str, "a", "A", -1), "b", "B", -1)
477         }
478 }
479
480 // BenchmarkByteByteMap compares byteByteImpl against Map.
481 func BenchmarkByteByteMap(b *testing.B) {
482         str := Repeat("a", 100) + Repeat("b", 100)
483         fn := func(r rune) rune {
484                 switch r {
485                 case 'a':
486                         return 'A'
487                 case 'b':
488                         return 'B'
489                 }
490                 return r
491         }
492         for i := 0; i < b.N; i++ {
493                 Map(fn, str)
494         }
495 }