1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
24 doNorm = flag.Bool("norm", false, "normalize input strings")
25 cases = flag.Bool("case", false, "generate case variants")
26 verbose = flag.Bool("verbose", false, "print results")
27 debug = flag.Bool("debug", false, "output debug information")
28 locale = flag.String("locale", "en_US", "the locale to use. May be a comma-separated list for some commands.")
29 col = flag.String("col", "go", "collator to test")
30 gold = flag.String("gold", "go", "collator used as the gold standard")
31 usecmp = flag.Bool("usecmp", false,
32 `use comparison instead of sort keys when sorting. Must be "test", "gold" or "both"`)
33 cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
34 exclude = flag.String("exclude", "", "exclude errors that contain any of the characters")
35 limit = flag.Int("limit", 5000000, "maximum number of samples to generate for one run")
38 func failOnError(err error) {
44 // Test holds test data for testing a locale-collator pair.
45 // Test also provides functionality that is commonly used by the various commands.
56 Duration time.Duration
63 func (t *Test) clear() {
69 msgGeneratingInput = "generating input"
70 msgGeneratingKeys = "generating keys"
71 msgSorting = "sorting"
76 func (t *Test) SetStatus(msg string) {
77 if *debug || *verbose {
78 fmt.Printf("%s: %s...\n", t.Name, msg)
79 } else if t.ctxt.out != nil {
80 fmt.Fprint(t.ctxt.out, strings.Repeat(" ", lastLen))
81 fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
82 fmt.Fprint(t.ctxt.out, msg, "...")
83 lastLen = len(msg) + 3
84 fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
88 // Start is used by commands to signal the start of an operation.
89 func (t *Test) Start(msg string) {
96 // Stop is used by commands to signal the end of an operation.
97 func (t *Test) Stop() (time.Duration, int) {
98 d := time.Now().Sub(t.start)
100 if *debug || *verbose {
101 fmt.Printf("%s: %s done. (%.3fs /%dK ops)\n", t.Name, t.msg, d.Seconds(), t.count/1000)
106 // generateKeys generates sort keys for all the inputs.
107 func (t *Test) generateKeys() {
108 for i, s := range t.Input {
112 fmt.Printf("%s (%X): %X\n", string(s.UTF8), s.UTF16, b)
117 // Sort sorts the inputs. It generates sort keys if this is required by the
118 // chosen sort method.
119 func (t *Test) Sort() (tkey, tsort time.Duration, nkey, nsort int) {
120 if *cpuprofile != "" {
121 f, err := os.Create(*cpuprofile)
123 pprof.StartCPUProfile(f)
124 defer pprof.StopCPUProfile()
126 if t.UseCompare || t.Col.Key(t.Input[0]) == nil {
128 sort.Sort(&testCompare{*t})
129 tsort, nsort = t.Stop()
131 t.Start(msgGeneratingKeys)
133 t.count = len(t.Input)
134 tkey, nkey = t.Stop()
137 tsort, nsort = t.Stop()
142 func (t *Test) Swap(a, b int) {
143 t.Input[a], t.Input[b] = t.Input[b], t.Input[a]
146 func (t *Test) Less(a, b int) bool {
148 return bytes.Compare(t.Input[a].key, t.Input[b].key) == -1
151 func (t Test) Len() int {
155 type testCompare struct {
159 func (t *testCompare) Less(a, b int) bool {
161 return t.Col.Compare(t.Input[a], t.Input[b]) == -1
164 type testRestore struct {
168 func (t *testRestore) Less(a, b int) bool {
169 return t.Input[a].index < t.Input[b].index
172 // GenerateInput generates input phrases for the locale tested by t.
173 func (t *Test) GenerateInput() {
175 if t.ctxt.lastLocale != t.Locale {
176 gen := phraseGenerator{}
178 t.SetStatus(msgGeneratingInput)
179 t.ctxt.lastInput = nil // allow the previous value to be garbage collected.
180 t.Input = gen.generate(*doNorm)
181 t.ctxt.lastInput = t.Input
182 t.ctxt.lastLocale = t.Locale
184 t.Input = t.ctxt.lastInput
185 for i := range t.Input {
188 sort.Sort(&testRestore{*t})
192 // Context holds all tests and settings translated from command line options.
193 type Context struct {
203 func (ts *Context) Printf(format string, a ...interface{}) {
205 fmt.Fprintf(ts.out, format, a...)
208 func (ts *Context) Print(a ...interface{}) {
210 fmt.Fprint(ts.out, a...)
213 // assertBuf sets up an io.Writer for ouput, if it doesn't already exist.
214 // In debug and verbose mode, output is buffered so that the regular output
215 // will not interfere with the additional output. Otherwise, output is
216 // written directly to stdout for a more responsive feel.
217 func (ts *Context) assertBuf() {
221 if *debug || *verbose {
222 ts.out = &bytes.Buffer{}
228 // flush flushes the contents of ts.out to stdout, if it is not stdout already.
229 func (ts *Context) flush() {
231 if _, ok := ts.out.(io.ReadCloser); !ok {
232 io.Copy(os.Stdout, ts.out.(io.Reader))
237 // parseTests creates all tests from command lines and returns
238 // a Context to hold them.
239 func parseTests() *Context {
241 colls := strings.Split(*col, ",")
242 for _, loc := range strings.Split(*locale, ",") {
243 loc = strings.TrimSpace(loc)
244 for _, name := range colls {
245 name = strings.TrimSpace(name)
246 col := getCollator(name, loc)
247 ctxt.test = append(ctxt.test, &Test{
259 func (c *Context) Len() int {
263 func (c *Context) Test(i int) *Test {
271 func parseInput(args []string) []Input {
273 for _, s := range args {
277 r, _, s, _ = strconv.UnquoteChar(s, '\'')
282 s = norm.NFC.String(s)
284 input = append(input, makeInputString(s))
289 // A Command is an implementation of a colcmp command.
290 type Command struct {
291 Run func(cmd *Context, args []string)
297 func (cmd Command) Name() string {
298 return strings.SplitN(cmd.Usage, " ", 2)[0]
301 var commands = []*Command{
308 Sort sorts a given list of strings. Strings are separated by whitespace.
311 var cmdSort = &Command{
313 Usage: "sort <string>*",
314 Short: "sort a given list of strings",
318 func runSort(ctxt *Context, args []string) {
319 input := parseInput(args)
321 log.Fatalf("Nothing to sort.")
324 ctxt.Print("COLL LOCALE RESULT\n")
326 for i := 0; i < ctxt.Len(); i++ {
328 t.Input = append(t.Input, input...)
331 ctxt.Printf("%-5s %-5s ", t.ColName, t.Locale)
333 for _, s := range t.Input {
334 ctxt.Print(string(s.UTF8), " ")
341 Bench runs a benchmark for the given list of collator implementations.
342 If no collator implementations are given, the go collator will be used.
345 var cmdBench = &Command{
348 Short: "benchmark a given list of collator implementations",
352 func runBench(ctxt *Context, args []string) {
353 ctxt.Printf("%-7s %-5s %-6s %-24s %-24s %-5s %s\n", "LOCALE", "COLL", "N", "KEYS", "SORT", "AVGLN", "TOTAL")
354 for i := 0; i < ctxt.Len(); i++ {
356 ctxt.Printf("%-7s %-5s ", t.Locale, t.ColName)
358 ctxt.Printf("%-6s ", fmt.Sprintf("%dK", t.Len()/1000))
359 tkey, tsort, nkey, nsort := t.Sort()
360 p := func(dur time.Duration, n int) {
363 s = fmt.Sprintf("%6.3fs ", dur.Seconds())
365 s += fmt.Sprintf("%15s", fmt.Sprintf("(%4.2f ns/op)", float64(dur)/float64(n)))
368 ctxt.Printf("%-24s ", s)
374 for _, s := range t.Input {
377 ctxt.Printf("%-5d ", total/t.Len())
378 ctxt.Printf("%6.3fs\n", t.Duration.Seconds())
380 for _, s := range t.Input {
381 fmt.Print(string(s.UTF8), " ")
388 const regressHelp = `
389 Regress runs a monkey test by comparing the results of randomly generated tests
390 between two implementations of a collator. The user may optionally pass a list
391 of strings to regress against instead of the default test set.
394 var cmdRegress = &Command{
396 Usage: "regress -gold=<col> -test=<col> [string]*",
397 Short: "run a monkey test between two collators",
401 const failedKeyCompare = `
402 %s:%d: incorrect comparison result for input:
407 Compare(a, b) = %d; want %d.
414 const failedCompare = `
415 %s:%d: incorrect comparison result for input:
418 Compare(a, b) = %d; want %d.
421 func keyStr(b []byte) string {
422 buf := &bytes.Buffer{}
423 for _, v := range b {
424 fmt.Fprintf(buf, "%.2X ", v)
429 func runRegress(ctxt *Context, args []string) {
430 input := parseInput(args)
431 for i := 0; i < ctxt.Len(); i++ {
434 t.Input = append(t.Input, input...)
440 gold := getCollator(*gold, t.Locale)
441 for i := 1; i < len(t.Input); i++ {
444 if bytes.IndexAny(ib.UTF8, *exclude) != -1 {
448 if bytes.IndexAny(ia.UTF8, *exclude) != -1 {
451 goldCmp := gold.Compare(ia, ib)
452 if cmp := bytes.Compare(ia.key, ib.key); cmp != goldCmp {
456 fmt.Printf(failedKeyCompare, t.Locale, i-1, a, []rune(a), keyStr(ia.key), b, []rune(b), keyStr(ib.key), cmp, goldCmp, keyStr(gold.Key(ia)), keyStr(gold.Key(ib)))
457 } else if cmp := t.Col.Compare(ia, ib); cmp != goldCmp {
461 fmt.Printf(failedCompare, t.Locale, i-1, a, []rune(a), b, []rune(b), cmp, goldCmp)
465 ctxt.Printf("Found %d inconsistencies in %d entries.\n", count, t.Len()-1)
470 const helpTemplate = `
471 colcmp is a tool for testing and benchmarking collation
473 Usage: colcmp command [arguments]
477 {{.Name | printf "%-11s"}} {{.Short}}{{end}}
479 Use "col help [topic]" for more information about that topic.
482 const detailedHelpTemplate = `
483 Usage: colcmp {{.Usage}}
488 func runHelp(args []string) {
489 t := template.New("help")
490 t.Funcs(template.FuncMap{"trim": strings.TrimSpace})
492 template.Must(t.Parse(helpTemplate))
493 failOnError(t.Execute(os.Stderr, &commands))
495 for _, cmd := range commands {
496 if cmd.Name() == args[0] {
497 template.Must(t.Parse(detailedHelpTemplate))
498 failOnError(t.Execute(os.Stderr, cmd))
502 log.Fatalf("Unknown command %q. Run 'colcmp help'.", args[0])
516 args := flag.Args()[1:]
517 if flag.Arg(0) == "help" {
520 for _, cmd := range commands {
521 if cmd.Name() == flag.Arg(0) {