...

Source file src/golang.org/x/text/encoding/internal/enctest/enctest.go

Documentation: golang.org/x/text/encoding/internal/enctest

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package enctest
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"strings"
    13  	"testing"
    14  
    15  	"golang.org/x/text/encoding"
    16  	"golang.org/x/text/encoding/internal/identifier"
    17  	"golang.org/x/text/transform"
    18  )
    19  
    20  // Encoder or Decoder
    21  type Transcoder interface {
    22  	transform.Transformer
    23  	Bytes([]byte) ([]byte, error)
    24  	String(string) (string, error)
    25  }
    26  
    27  func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) {
    28  	for _, direction := range []string{"Decode", "Encode"} {
    29  		t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) {
    30  
    31  			var coder Transcoder
    32  			var want, src, wPrefix, sPrefix, wSuffix, sSuffix string
    33  			if direction == "Decode" {
    34  				coder, want, src = e.NewDecoder(), utf8, encoded
    35  				wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix
    36  			} else {
    37  				coder, want, src = e.NewEncoder(), encoded, utf8
    38  				wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, ""
    39  			}
    40  
    41  			dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix))
    42  			nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true)
    43  			if err != nil {
    44  				t.Fatal(err)
    45  			}
    46  			if nDst != len(wPrefix)+len(want)+len(wSuffix) {
    47  				t.Fatalf("nDst got %d, want %d",
    48  					nDst, len(wPrefix)+len(want)+len(wSuffix))
    49  			}
    50  			if nSrc != len(sPrefix)+len(src)+len(sSuffix) {
    51  				t.Fatalf("nSrc got %d, want %d",
    52  					nSrc, len(sPrefix)+len(src)+len(sSuffix))
    53  			}
    54  			if got := string(dst); got != wPrefix+want+wSuffix {
    55  				t.Fatalf("\ngot  %q\nwant %q", got, wPrefix+want+wSuffix)
    56  			}
    57  
    58  			for _, n := range []int{0, 1, 2, 10, 123, 4567} {
    59  				input := sPrefix + strings.Repeat(src, n) + sSuffix
    60  				g, err := coder.String(input)
    61  				if err != nil {
    62  					t.Fatalf("Bytes: n=%d: %v", n, err)
    63  				}
    64  				if len(g) == 0 && len(input) == 0 {
    65  					// If the input is empty then the output can be empty,
    66  					// regardless of whatever wPrefix is.
    67  					continue
    68  				}
    69  				got1, want1 := g, wPrefix+strings.Repeat(want, n)+wSuffix
    70  				if got1 != want1 {
    71  					t.Fatalf("ReadAll: n=%d\ngot  %q\nwant %q",
    72  						n, trim(got1), trim(want1))
    73  				}
    74  			}
    75  		})
    76  	}
    77  }
    78  
    79  func TestFile(t *testing.T, e encoding.Encoding) {
    80  	for _, dir := range []string{"Decode", "Encode"} {
    81  		t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) {
    82  			dst, src, transformer, err := load(dir, e)
    83  			if err != nil {
    84  				t.Fatalf("load: %v", err)
    85  			}
    86  			buf, err := transformer.Bytes(src)
    87  			if err != nil {
    88  				t.Fatalf("transform: %v", err)
    89  			}
    90  			if !bytes.Equal(buf, dst) {
    91  				t.Error("transformed bytes did not match golden file")
    92  			}
    93  		})
    94  	}
    95  }
    96  
    97  func Benchmark(b *testing.B, enc encoding.Encoding) {
    98  	for _, direction := range []string{"Decode", "Encode"} {
    99  		b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) {
   100  			_, src, transformer, err := load(direction, enc)
   101  			if err != nil {
   102  				b.Fatal(err)
   103  			}
   104  			b.SetBytes(int64(len(src)))
   105  			b.ResetTimer()
   106  			for i := 0; i < b.N; i++ {
   107  				r := transform.NewReader(bytes.NewReader(src), transformer)
   108  				io.Copy(io.Discard, r)
   109  			}
   110  		})
   111  	}
   112  }
   113  
   114  // testdataFiles are files in testdata/*.txt.
   115  var testdataFiles = []struct {
   116  	mib           identifier.MIB
   117  	basename, ext string
   118  }{
   119  	{identifier.Windows1252, "candide", "windows-1252"},
   120  	{identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"},
   121  	{identifier.ISO2022JP, "rashomon", "iso-2022-jp"},
   122  	{identifier.ShiftJIS, "rashomon", "shift-jis"},
   123  	{identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"},
   124  	{identifier.GBK, "sunzi-bingfa-simplified", "gbk"},
   125  	{identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"},
   126  	{identifier.Big5, "sunzi-bingfa-traditional", "big5"},
   127  	{identifier.UTF16LE, "candide", "utf-16le"},
   128  	{identifier.UTF8, "candide", "utf-8"},
   129  	{identifier.UTF32BE, "candide", "utf-32be"},
   130  
   131  	// GB18030 is a superset of GBK and is nominally a Simplified Chinese
   132  	// encoding, but it can also represent the entire Basic Multilingual
   133  	// Plane, including codepoints like 'รข' that aren't encodable by GBK.
   134  	// GB18030 on Simplified Chinese should perform similarly to GBK on
   135  	// Simplified Chinese. GB18030 on "candide" is more interesting.
   136  	{identifier.GB18030, "candide", "gb18030"},
   137  }
   138  
   139  func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
   140  	basename, ext, count := "", "", 0
   141  	for _, tf := range testdataFiles {
   142  		if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib {
   143  			basename, ext = tf.basename, tf.ext
   144  			count++
   145  		}
   146  	}
   147  	if count != 1 {
   148  		if count == 0 {
   149  			return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
   150  		}
   151  		return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
   152  	}
   153  	dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext)
   154  	srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename)
   155  	var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
   156  	if direction == "Decode" {
   157  		dstFile, srcFile = srcFile, dstFile
   158  		coder = enc.NewDecoder()
   159  	}
   160  	dst, err := os.ReadFile(dstFile)
   161  	if err != nil {
   162  		if dst, err = os.ReadFile("../" + dstFile); err != nil {
   163  			return nil, nil, nil, err
   164  		}
   165  	}
   166  	src, err := os.ReadFile(srcFile)
   167  	if err != nil {
   168  		if src, err = os.ReadFile("../" + srcFile); err != nil {
   169  			return nil, nil, nil, err
   170  		}
   171  	}
   172  	return dst, src, coder, nil
   173  }
   174  
   175  func trim(s string) string {
   176  	if len(s) < 120 {
   177  		return s
   178  	}
   179  	return s[:50] + "..." + s[len(s)-50:]
   180  }
   181  

View as plain text