"testing"
)
-var iterBufSizes = []int{
- MaxSegmentSize,
- 1.5 * MaxSegmentSize,
- 2 * MaxSegmentSize,
- 3 * MaxSegmentSize,
- 100 * MaxSegmentSize,
-}
-
-func doIterNorm(f Form, buf []byte, s string) []byte {
+func doIterNorm(f Form, s string) []byte {
acc := []byte{}
i := Iter{}
- i.SetInputString(f, s)
+ i.InitString(f, s)
for !i.Done() {
- n := i.Next(buf)
- acc = append(acc, buf[:n]...)
+ acc = append(acc, i.Next()...)
}
return acc
}
if norm {
gold = string(f.AppendString(nil, test.out))
}
- for _, sz := range iterBufSizes {
- buf := make([]byte, sz)
- out := string(doIterNorm(f, buf, in))
- if len(out) != len(gold) {
- const msg = "%s:%d:%d: length is %d; want %d"
- t.Errorf(msg, name, i, sz, len(out), len(gold))
- }
- if out != gold {
- // Find first rune that differs and show context.
- ir := []rune(out)
- ig := []rune(gold)
- for j := 0; j < len(ir) && j < len(ig); j++ {
- if ir[j] == ig[j] {
- continue
- }
- if j -= 3; j < 0 {
- j = 0
- }
- for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
- const msg = "%s:%d:%d: runeAt(%d) = %U; want %U"
- t.Errorf(msg, name, i, sz, j, ir[j], ig[j])
- }
- break
+ out := string(doIterNorm(f, in))
+ if len(out) != len(gold) {
+ const msg = "%s:%d: length is %d; want %d"
+ t.Errorf(msg, name, i, len(out), len(gold))
+ }
+ if out != gold {
+ // Find first rune that differs and show context.
+ ir := []rune(out)
+ ig := []rune(gold)
+ t.Errorf("\n%X != \n%X", ir, ig)
+ for j := 0; j < len(ir) && j < len(ig); j++ {
+ if ir[j] == ig[j] {
+ continue
+ }
+ if j -= 3; j < 0 {
+ j = 0
}
+ for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
+ const msg = "%s:%d: runeAt(%d) = %U; want %U"
+ t.Errorf(msg, name, i, j, ir[j], ig[j])
+ }
+ break
}
}
}
return strings.Repeat(string(r), n)
}
+const segSize = maxByteBufferSize
+
var iterTests = []AppendTest{
{"", ascii, ascii},
{"", txt_all, txt_all},
- {"", "a" + rep(0x0300, MaxSegmentSize/2), "a" + rep(0x0300, MaxSegmentSize/2)},
+ {"", "a" + rep(0x0300, segSize/2), "a" + rep(0x0300, segSize/2)},
}
var iterTestsD = []AppendTest{
{ // segment overflow on unchanged character
"",
- "a" + rep(0x0300, MaxSegmentSize/2) + "\u0316",
- "a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0316\u0300",
+ "a" + rep(0x0300, segSize/2) + "\u0316",
+ "a" + rep(0x0300, segSize/2-1) + "\u0316\u0300",
},
{ // segment overflow on unchanged character + start value
"",
- "a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars+4) + "\u0316",
- "a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
+ "a" + rep(0x0300, segSize/2+maxCombiningChars+4) + "\u0316",
+ "a" + rep(0x0300, segSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
},
{ // segment overflow on decomposition
"",
- "a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340",
- "a" + rep(0x0300, MaxSegmentSize/2),
+ "a" + rep(0x0300, segSize/2-1) + "\u0340",
+ "a" + rep(0x0300, segSize/2),
},
{ // segment overflow on decomposition + start value
"",
- "a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
- "a" + rep(0x0300, MaxSegmentSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
+ "a" + rep(0x0300, segSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
+ "a" + rep(0x0300, segSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
},
{ // start value after ASCII overflow
"",
- rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
- rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
+ rep('a', segSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
+ rep('a', segSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
},
{ // start value after Hangul overflow
"",
- rep(0xAC00, MaxSegmentSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
- strings.Repeat("\u1100\u1161", MaxSegmentSize/6) + rep(0x300, maxCombiningChars-1) + "\u0320" + rep(0x300, 3),
+ rep(0xAC00, segSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
+ strings.Repeat("\u1100\u1161", segSize/6) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 1),
},
{ // start value after cc=0
"",
},
{ // segment overflow
"",
- "a" + rep(0x0305, MaxSegmentSize/2+4) + "\u0316",
- "a" + rep(0x0305, MaxSegmentSize/2-1) + "\u0316" + rep(0x305, 5),
+ "a" + rep(0x0305, segSize/2+4) + "\u0316",
+ "a" + rep(0x0305, segSize/2-1) + "\u0316" + rep(0x305, 5),
},
}
}
var segmentTests = []SegmentTest{
- {rep('a', MaxSegmentSize), []string{rep('a', MaxSegmentSize), ""}},
- {rep('a', MaxSegmentSize+2), []string{rep('a', MaxSegmentSize-1), "aaa", ""}},
- {rep('a', MaxSegmentSize) + "\u0300aa", []string{rep('a', MaxSegmentSize-1), "a\u0300", "aa", ""}},
+ {"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
+ {rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
+ {rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
+ {rep('a', segSize) + "\u0300aa",
+ append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
+}
+
+var segmentTestsK = []SegmentTest{
+ {"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
+ // last segment of multi-segment decomposition needs normalization
+ {"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
+ // Hangul and Jamo are grouped togeter.
+ {"\uAC00", []string{"\u1100\u1161", ""}},
+ {"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
+ {"\u1100\u1161", []string{"\u1100\u1161", ""}},
}
// Note that, by design, segmentation is equal for composing and decomposing forms.
func TestIterSegmentation(t *testing.T) {
segmentTest(t, "SegmentTestD", NFD, segmentTests)
segmentTest(t, "SegmentTestC", NFC, segmentTests)
+ segmentTest(t, "SegmentTestD", NFKD, segmentTestsK)
+ segmentTest(t, "SegmentTestC", NFKC, segmentTestsK)
}
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
iter := Iter{}
- for i, tt := range segmentTests {
- buf := make([]byte, MaxSegmentSize)
- iter.SetInputString(f, tt.in)
+ for i, tt := range tests {
+ iter.InitString(f, tt.in)
for j, seg := range tt.out {
if seg == "" {
if !iter.Done() {
- n := iter.Next(buf)
- res := string(buf[:n])
+ res := string(iter.Next())
t.Errorf(`%s:%d:%d: expected Done()==true, found segment "%s"`, name, i, j, res)
}
continue
if iter.Done() {
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
}
- n := iter.Next(buf)
seg = f.String(seg)
- if res := string(buf[:n]); res != seg {
- t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d)`, name, i, j, res, len(res), seg, len(seg))
+ if res := string(iter.Next()); res != seg {
+ t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d) %X %X`, name, i, j, res, len(res), seg, len(seg), []rune(res), []rune(seg))
}
}
}