...

Source file src/golang.org/x/text/runes/runes_test.go

Documentation: golang.org/x/text/runes

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runes
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  	"unicode/utf8"
    11  
    12  	"golang.org/x/text/internal/testtext"
    13  	"golang.org/x/text/transform"
    14  )
    15  
    16  type transformTest struct {
    17  	desc    string
    18  	szDst   int
    19  	atEOF   bool
    20  	repl    string
    21  	in      string
    22  	out     string // result string of first call to Transform
    23  	outFull string // transform of entire input string
    24  	err     error
    25  	errSpan error
    26  	nSpan   int
    27  
    28  	t transform.SpanningTransformer
    29  }
    30  
    31  const large = 10240
    32  
    33  func (tt *transformTest) check(t *testing.T, i int) {
    34  	if tt.t == nil {
    35  		return
    36  	}
    37  	dst := make([]byte, tt.szDst)
    38  	src := []byte(tt.in)
    39  	nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF)
    40  	if err != tt.err {
    41  		t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err)
    42  	}
    43  	if got := string(dst[:nDst]); got != tt.out {
    44  		t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out)
    45  	}
    46  
    47  	// Calls tt.t.Transform for the remainder of the input. We use this to test
    48  	// the nSrc return value.
    49  	out := make([]byte, large)
    50  	n := copy(out, dst[:nDst])
    51  	nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true)
    52  	if got, want := string(out[:n+nDst]), tt.outFull; got != want {
    53  		t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want)
    54  	}
    55  
    56  	tt.t.Reset()
    57  	p := 0
    58  	for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ {
    59  	}
    60  	if tt.nSpan != 0 {
    61  		p = tt.nSpan
    62  	}
    63  	if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan {
    64  		t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan)
    65  	}
    66  }
    67  
    68  func idem(r rune) rune { return r }
    69  
    70  func TestMap(t *testing.T) {
    71  	runes := []rune{'a', 'ç', '中', '\U00012345', 'a'}
    72  	// Default mapper used for this test.
    73  	rotate := Map(func(r rune) rune {
    74  		for i, m := range runes {
    75  			if m == r {
    76  				return runes[i+1]
    77  			}
    78  		}
    79  		return r
    80  	})
    81  
    82  	for i, tt := range []transformTest{{
    83  		desc:    "empty",
    84  		szDst:   large,
    85  		atEOF:   true,
    86  		in:      "",
    87  		out:     "",
    88  		outFull: "",
    89  		t:       rotate,
    90  	}, {
    91  		desc:    "no change",
    92  		szDst:   1,
    93  		atEOF:   true,
    94  		in:      "b",
    95  		out:     "b",
    96  		outFull: "b",
    97  		t:       rotate,
    98  	}, {
    99  		desc:    "short dst",
   100  		szDst:   2,
   101  		atEOF:   true,
   102  		in:      "aaaa",
   103  		out:     "ç",
   104  		outFull: "çççç",
   105  		err:     transform.ErrShortDst,
   106  		errSpan: transform.ErrEndOfSpan,
   107  		t:       rotate,
   108  	}, {
   109  		desc:    "short dst ascii, no change",
   110  		szDst:   2,
   111  		atEOF:   true,
   112  		in:      "bbb",
   113  		out:     "bb",
   114  		outFull: "bbb",
   115  		err:     transform.ErrShortDst,
   116  		t:       rotate,
   117  	}, {
   118  		desc:    "short dst writing error",
   119  		szDst:   2,
   120  		atEOF:   false,
   121  		in:      "a\x80",
   122  		out:     "ç",
   123  		outFull: "ç\ufffd",
   124  		err:     transform.ErrShortDst,
   125  		errSpan: transform.ErrEndOfSpan,
   126  		t:       rotate,
   127  	}, {
   128  		desc:    "short dst writing incomplete rune",
   129  		szDst:   2,
   130  		atEOF:   true,
   131  		in:      "a\xc0",
   132  		out:     "ç",
   133  		outFull: "ç\ufffd",
   134  		err:     transform.ErrShortDst,
   135  		errSpan: transform.ErrEndOfSpan,
   136  		t:       rotate,
   137  	}, {
   138  		desc:    "short dst, longer",
   139  		szDst:   5,
   140  		atEOF:   true,
   141  		in:      "Hellø",
   142  		out:     "Hell",
   143  		outFull: "Hellø",
   144  		err:     transform.ErrShortDst,
   145  		t:       rotate,
   146  	}, {
   147  		desc:    "short dst, single",
   148  		szDst:   1,
   149  		atEOF:   false,
   150  		in:      "ø",
   151  		out:     "",
   152  		outFull: "ø",
   153  		err:     transform.ErrShortDst,
   154  		t:       Map(idem),
   155  	}, {
   156  		desc:    "short dst, longer, writing error",
   157  		szDst:   8,
   158  		atEOF:   false,
   159  		in:      "\x80Hello\x80",
   160  		out:     "\ufffdHello",
   161  		outFull: "\ufffdHello\ufffd",
   162  		err:     transform.ErrShortDst,
   163  		errSpan: transform.ErrEndOfSpan,
   164  		t:       rotate,
   165  	}, {
   166  		desc:    "short src",
   167  		szDst:   2,
   168  		atEOF:   false,
   169  		in:      "a\xc2",
   170  		out:     "ç",
   171  		outFull: "ç\ufffd",
   172  		err:     transform.ErrShortSrc,
   173  		errSpan: transform.ErrEndOfSpan,
   174  		t:       rotate,
   175  	}, {
   176  		desc:    "invalid input, atEOF",
   177  		szDst:   large,
   178  		atEOF:   true,
   179  		in:      "\x80",
   180  		out:     "\ufffd",
   181  		outFull: "\ufffd",
   182  		errSpan: transform.ErrEndOfSpan,
   183  		t:       rotate,
   184  	}, {
   185  		desc:    "invalid input, !atEOF",
   186  		szDst:   large,
   187  		atEOF:   false,
   188  		in:      "\x80",
   189  		out:     "\ufffd",
   190  		outFull: "\ufffd",
   191  		errSpan: transform.ErrEndOfSpan,
   192  		t:       rotate,
   193  	}, {
   194  		desc:    "incomplete rune !atEOF",
   195  		szDst:   large,
   196  		atEOF:   false,
   197  		in:      "\xc2",
   198  		out:     "",
   199  		outFull: "\ufffd",
   200  		err:     transform.ErrShortSrc,
   201  		errSpan: transform.ErrShortSrc,
   202  		t:       rotate,
   203  	}, {
   204  		desc:    "invalid input, incomplete rune atEOF",
   205  		szDst:   large,
   206  		atEOF:   true,
   207  		in:      "\xc2",
   208  		out:     "\ufffd",
   209  		outFull: "\ufffd",
   210  		errSpan: transform.ErrEndOfSpan,
   211  		t:       rotate,
   212  	}, {
   213  		desc:    "misc correct",
   214  		szDst:   large,
   215  		atEOF:   true,
   216  		in:      "a\U00012345 ç!",
   217  		out:     "ça 中!",
   218  		outFull: "ça 中!",
   219  		errSpan: transform.ErrEndOfSpan,
   220  		t:       rotate,
   221  	}, {
   222  		desc:    "misc correct and invalid",
   223  		szDst:   large,
   224  		atEOF:   true,
   225  		in:      "Hello\x80 w\x80orl\xc0d!\xc0",
   226  		out:     "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
   227  		outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
   228  		errSpan: transform.ErrEndOfSpan,
   229  		t:       rotate,
   230  	}, {
   231  		desc:    "misc correct and invalid, short src",
   232  		szDst:   large,
   233  		atEOF:   false,
   234  		in:      "Hello\x80 w\x80orl\xc0d!\xc2",
   235  		out:     "Hello\ufffd w\ufffdorl\ufffdd!",
   236  		outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
   237  		err:     transform.ErrShortSrc,
   238  		errSpan: transform.ErrEndOfSpan,
   239  		t:       rotate,
   240  	}, {
   241  		desc:    "misc correct and invalid, short src, replacing RuneError",
   242  		szDst:   large,
   243  		atEOF:   false,
   244  		in:      "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
   245  		out:     "Hel?lo? w?orl?d!",
   246  		outFull: "Hel?lo? w?orl?d!?",
   247  		errSpan: transform.ErrEndOfSpan,
   248  		err:     transform.ErrShortSrc,
   249  		t: Map(func(r rune) rune {
   250  			if r == utf8.RuneError {
   251  				return '?'
   252  			}
   253  			return r
   254  		}),
   255  	}} {
   256  		tt.check(t, i)
   257  	}
   258  }
   259  
   260  func TestRemove(t *testing.T) {
   261  	remove := Remove(Predicate(func(r rune) bool {
   262  		return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r)
   263  	}))
   264  
   265  	for i, tt := range []transformTest{
   266  		0: {
   267  			szDst:   large,
   268  			atEOF:   true,
   269  			in:      "",
   270  			out:     "",
   271  			outFull: "",
   272  			t:       remove,
   273  		},
   274  		1: {
   275  			szDst:   0,
   276  			atEOF:   true,
   277  			in:      "aaaa",
   278  			out:     "",
   279  			outFull: "",
   280  			errSpan: transform.ErrEndOfSpan,
   281  			t:       remove,
   282  		},
   283  		2: {
   284  			szDst:   1,
   285  			atEOF:   true,
   286  			in:      "aaaa",
   287  			out:     "",
   288  			outFull: "",
   289  			errSpan: transform.ErrEndOfSpan,
   290  			t:       remove,
   291  		},
   292  		3: {
   293  			szDst:   1,
   294  			atEOF:   true,
   295  			in:      "baaaa",
   296  			out:     "b",
   297  			outFull: "b",
   298  			errSpan: transform.ErrEndOfSpan,
   299  			t:       remove,
   300  		},
   301  		4: {
   302  			szDst:   2,
   303  			atEOF:   true,
   304  			in:      "açaaa",
   305  			out:     "ç",
   306  			outFull: "ç",
   307  			errSpan: transform.ErrEndOfSpan,
   308  			t:       remove,
   309  		},
   310  		5: {
   311  			szDst:   2,
   312  			atEOF:   true,
   313  			in:      "aaaç",
   314  			out:     "ç",
   315  			outFull: "ç",
   316  			errSpan: transform.ErrEndOfSpan,
   317  			t:       remove,
   318  		},
   319  		6: {
   320  			szDst:   2,
   321  			atEOF:   false,
   322  			in:      "a\x80",
   323  			out:     "",
   324  			outFull: "\ufffd",
   325  			err:     transform.ErrShortDst,
   326  			errSpan: transform.ErrEndOfSpan,
   327  			t:       remove,
   328  		},
   329  		7: {
   330  			szDst:   1,
   331  			atEOF:   true,
   332  			in:      "a\xc0",
   333  			out:     "",
   334  			outFull: "\ufffd",
   335  			err:     transform.ErrShortDst,
   336  			errSpan: transform.ErrEndOfSpan,
   337  			t:       remove,
   338  		},
   339  		8: {
   340  			szDst:   1,
   341  			atEOF:   false,
   342  			in:      "a\xc2",
   343  			out:     "",
   344  			outFull: "\ufffd",
   345  			err:     transform.ErrShortSrc,
   346  			errSpan: transform.ErrEndOfSpan,
   347  			t:       remove,
   348  		},
   349  		9: {
   350  			szDst:   large,
   351  			atEOF:   true,
   352  			in:      "\x80",
   353  			out:     "\ufffd",
   354  			outFull: "\ufffd",
   355  			errSpan: transform.ErrEndOfSpan,
   356  			t:       remove,
   357  		},
   358  		10: {
   359  			szDst:   large,
   360  			atEOF:   false,
   361  			in:      "\x80",
   362  			out:     "\ufffd",
   363  			outFull: "\ufffd",
   364  			errSpan: transform.ErrEndOfSpan,
   365  			t:       remove,
   366  		},
   367  		11: {
   368  			szDst:   large,
   369  			atEOF:   true,
   370  			in:      "\xc2",
   371  			out:     "\ufffd",
   372  			outFull: "\ufffd",
   373  			errSpan: transform.ErrEndOfSpan,
   374  			t:       remove,
   375  		},
   376  		12: {
   377  			szDst:   large,
   378  			atEOF:   false,
   379  			in:      "\xc2",
   380  			out:     "",
   381  			outFull: "\ufffd",
   382  			err:     transform.ErrShortSrc,
   383  			errSpan: transform.ErrShortSrc,
   384  			t:       remove,
   385  		},
   386  		13: {
   387  			szDst:   large,
   388  			atEOF:   true,
   389  			in:      "Hello \U00012345world!",
   390  			out:     "Hll wrld!",
   391  			outFull: "Hll wrld!",
   392  			errSpan: transform.ErrEndOfSpan,
   393  			t:       remove,
   394  		},
   395  		14: {
   396  			szDst:   large,
   397  			atEOF:   true,
   398  			in:      "Hello\x80 w\x80orl\xc0d!\xc0",
   399  			out:     "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
   400  			outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
   401  			errSpan: transform.ErrEndOfSpan,
   402  			t:       remove,
   403  		},
   404  		15: {
   405  			szDst:   large,
   406  			atEOF:   false,
   407  			in:      "Hello\x80 w\x80orl\xc0d!\xc2",
   408  			out:     "Hll\ufffd w\ufffdrl\ufffdd!",
   409  			outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
   410  			err:     transform.ErrShortSrc,
   411  			errSpan: transform.ErrEndOfSpan,
   412  			t:       remove,
   413  		},
   414  		16: {
   415  			szDst:   large,
   416  			atEOF:   false,
   417  			in:      "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
   418  			out:     "Hello world!",
   419  			outFull: "Hello world!",
   420  			err:     transform.ErrShortSrc,
   421  			errSpan: transform.ErrEndOfSpan,
   422  			t:       Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })),
   423  		},
   424  		17: {
   425  			szDst:   4,
   426  			atEOF:   true,
   427  			in:      "Hellø",
   428  			out:     "Hll",
   429  			outFull: "Hllø",
   430  			err:     transform.ErrShortDst,
   431  			errSpan: transform.ErrEndOfSpan,
   432  			t:       remove,
   433  		},
   434  		18: {
   435  			szDst:   4,
   436  			atEOF:   false,
   437  			in:      "Hellø",
   438  			out:     "Hll",
   439  			outFull: "Hllø",
   440  			err:     transform.ErrShortDst,
   441  			errSpan: transform.ErrEndOfSpan,
   442  			t:       remove,
   443  		},
   444  		19: {
   445  			szDst:   8,
   446  			atEOF:   false,
   447  			in:      "\x80Hello\uFF24\x80",
   448  			out:     "\ufffdHll",
   449  			outFull: "\ufffdHll\ufffd",
   450  			err:     transform.ErrShortDst,
   451  			errSpan: transform.ErrEndOfSpan,
   452  			t:       remove,
   453  		},
   454  		20: {
   455  			szDst:   8,
   456  			atEOF:   false,
   457  			in:      "Hllll",
   458  			out:     "Hllll",
   459  			outFull: "Hllll",
   460  			t:       remove,
   461  		}} {
   462  		tt.check(t, i)
   463  	}
   464  }
   465  
   466  func TestReplaceIllFormed(t *testing.T) {
   467  	replace := ReplaceIllFormed()
   468  
   469  	for i, tt := range []transformTest{
   470  		0: {
   471  			szDst:   large,
   472  			atEOF:   true,
   473  			in:      "",
   474  			out:     "",
   475  			outFull: "",
   476  			t:       replace,
   477  		},
   478  		1: {
   479  			szDst:   1,
   480  			atEOF:   true,
   481  			in:      "aa",
   482  			out:     "a",
   483  			outFull: "aa",
   484  			err:     transform.ErrShortDst,
   485  			t:       replace,
   486  		},
   487  		2: {
   488  			szDst:   1,
   489  			atEOF:   true,
   490  			in:      "a\x80",
   491  			out:     "a",
   492  			outFull: "a\ufffd",
   493  			err:     transform.ErrShortDst,
   494  			errSpan: transform.ErrEndOfSpan,
   495  			t:       replace,
   496  		},
   497  		3: {
   498  			szDst:   1,
   499  			atEOF:   true,
   500  			in:      "a\xc2",
   501  			out:     "a",
   502  			outFull: "a\ufffd",
   503  			err:     transform.ErrShortDst,
   504  			errSpan: transform.ErrEndOfSpan,
   505  			t:       replace,
   506  		},
   507  		4: {
   508  			szDst:   large,
   509  			atEOF:   true,
   510  			in:      "\x80",
   511  			out:     "\ufffd",
   512  			outFull: "\ufffd",
   513  			errSpan: transform.ErrEndOfSpan,
   514  			t:       replace,
   515  		},
   516  		5: {
   517  			szDst:   large,
   518  			atEOF:   false,
   519  			in:      "\x80",
   520  			out:     "\ufffd",
   521  			outFull: "\ufffd",
   522  			errSpan: transform.ErrEndOfSpan,
   523  			t:       replace,
   524  		},
   525  		6: {
   526  			szDst:   large,
   527  			atEOF:   true,
   528  			in:      "\xc2",
   529  			out:     "\ufffd",
   530  			outFull: "\ufffd",
   531  			errSpan: transform.ErrEndOfSpan,
   532  			t:       replace,
   533  		},
   534  		7: {
   535  			szDst:   large,
   536  			atEOF:   false,
   537  			in:      "\xc2",
   538  			out:     "",
   539  			outFull: "\ufffd",
   540  			err:     transform.ErrShortSrc,
   541  			errSpan: transform.ErrShortSrc,
   542  			t:       replace,
   543  		},
   544  		8: {
   545  			szDst:   large,
   546  			atEOF:   true,
   547  			in:      "Hello world!",
   548  			out:     "Hello world!",
   549  			outFull: "Hello world!",
   550  			t:       replace,
   551  		},
   552  		9: {
   553  			szDst:   large,
   554  			atEOF:   true,
   555  			in:      "Hello\x80 w\x80orl\xc2d!\xc2",
   556  			out:     "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
   557  			outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
   558  			errSpan: transform.ErrEndOfSpan,
   559  			t:       replace,
   560  		},
   561  		10: {
   562  			szDst:   large,
   563  			atEOF:   false,
   564  			in:      "Hello\x80 w\x80orl\xc2d!\xc2",
   565  			out:     "Hello\ufffd w\ufffdorl\ufffdd!",
   566  			outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
   567  			err:     transform.ErrShortSrc,
   568  			errSpan: transform.ErrEndOfSpan,
   569  			t:       replace,
   570  		},
   571  		16: {
   572  			szDst:   10,
   573  			atEOF:   false,
   574  			in:      "\x80Hello\x80",
   575  			out:     "\ufffdHello",
   576  			outFull: "\ufffdHello\ufffd",
   577  			err:     transform.ErrShortDst,
   578  			errSpan: transform.ErrEndOfSpan,
   579  			t:       replace,
   580  		},
   581  		17: {
   582  			szDst:   10,
   583  			atEOF:   false,
   584  			in:      "\ufffdHello\ufffd",
   585  			out:     "\ufffdHello",
   586  			outFull: "\ufffdHello\ufffd",
   587  			err:     transform.ErrShortDst,
   588  			t:       replace,
   589  		},
   590  	} {
   591  		tt.check(t, i)
   592  	}
   593  }
   594  
   595  func TestMapAlloc(t *testing.T) {
   596  	if n := testtext.AllocsPerRun(3, func() {
   597  		Map(idem).Transform(nil, nil, false)
   598  	}); n > 0 {
   599  		t.Errorf("got %f; want 0", n)
   600  	}
   601  }
   602  
   603  func rmNop(r rune) bool { return false }
   604  
   605  func TestRemoveAlloc(t *testing.T) {
   606  	if n := testtext.AllocsPerRun(3, func() {
   607  		Remove(Predicate(rmNop)).Transform(nil, nil, false)
   608  	}); n > 0 {
   609  		t.Errorf("got %f; want 0", n)
   610  	}
   611  }
   612  
   613  func TestReplaceIllFormedAlloc(t *testing.T) {
   614  	if n := testtext.AllocsPerRun(3, func() {
   615  		ReplaceIllFormed().Transform(nil, nil, false)
   616  	}); n > 0 {
   617  		t.Errorf("got %f; want 0", n)
   618  	}
   619  }
   620  
   621  func doBench(b *testing.B, t Transformer) {
   622  	for _, bc := range []struct{ name, data string }{
   623  		{"ascii", testtext.ASCII},
   624  		{"3byte", testtext.ThreeByteUTF8},
   625  	} {
   626  		dst := make([]byte, 2*len(bc.data))
   627  		src := []byte(bc.data)
   628  
   629  		testtext.Bench(b, bc.name+"/transform", func(b *testing.B) {
   630  			b.SetBytes(int64(len(src)))
   631  			for i := 0; i < b.N; i++ {
   632  				t.Transform(dst, src, true)
   633  			}
   634  		})
   635  		src = t.Bytes(src)
   636  		t.Reset()
   637  		testtext.Bench(b, bc.name+"/span", func(b *testing.B) {
   638  			b.SetBytes(int64(len(src)))
   639  			for i := 0; i < b.N; i++ {
   640  				t.Span(src, true)
   641  			}
   642  		})
   643  	}
   644  }
   645  
   646  func BenchmarkRemove(b *testing.B) {
   647  	doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' })))
   648  }
   649  
   650  func BenchmarkMapAll(b *testing.B) {
   651  	doBench(b, Map(func(r rune) rune { return 'a' }))
   652  }
   653  
   654  func BenchmarkMapNone(b *testing.B) {
   655  	doBench(b, Map(func(r rune) rune { return r }))
   656  }
   657  
   658  func BenchmarkReplaceIllFormed(b *testing.B) {
   659  	doBench(b, ReplaceIllFormed())
   660  }
   661  
   662  var (
   663  	input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100)
   664  )
   665  

View as plain text