...

Source file src/golang.org/x/text/encoding/japanese/shiftjis.go

Documentation: golang.org/x/text/encoding/japanese

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package japanese
     6  
     7  import (
     8  	"unicode/utf8"
     9  
    10  	"golang.org/x/text/encoding"
    11  	"golang.org/x/text/encoding/internal"
    12  	"golang.org/x/text/encoding/internal/identifier"
    13  	"golang.org/x/text/transform"
    14  )
    15  
    16  // ShiftJIS is the Shift JIS encoding, also known as Code Page 932 and
    17  // Windows-31J.
    18  var ShiftJIS encoding.Encoding = &shiftJIS
    19  
    20  var shiftJIS = internal.Encoding{
    21  	&internal.SimpleEncoding{shiftJISDecoder{}, shiftJISEncoder{}},
    22  	"Shift JIS",
    23  	identifier.ShiftJIS,
    24  }
    25  
    26  type shiftJISDecoder struct{ transform.NopResetter }
    27  
    28  func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    29  	r, size := rune(0), 0
    30  loop:
    31  	for ; nSrc < len(src); nSrc += size {
    32  		switch c0 := src[nSrc]; {
    33  		case c0 < utf8.RuneSelf:
    34  			r, size = rune(c0), 1
    35  
    36  		case 0xa1 <= c0 && c0 < 0xe0:
    37  			r, size = rune(c0)+(0xff61-0xa1), 1
    38  
    39  		case (0x81 <= c0 && c0 < 0xa0) || (0xe0 <= c0 && c0 < 0xfd):
    40  			if c0 <= 0x9f {
    41  				c0 -= 0x70
    42  			} else {
    43  				c0 -= 0xb0
    44  			}
    45  			c0 = 2*c0 - 0x21
    46  
    47  			if nSrc+1 >= len(src) {
    48  				if !atEOF {
    49  					err = transform.ErrShortSrc
    50  					break loop
    51  				}
    52  				r, size = '\ufffd', 1
    53  				goto write
    54  			}
    55  			c1 := src[nSrc+1]
    56  			switch {
    57  			case c1 < 0x40:
    58  				r, size = '\ufffd', 1 // c1 is ASCII so output on next round
    59  				goto write
    60  			case c1 < 0x7f:
    61  				c0--
    62  				c1 -= 0x40
    63  			case c1 == 0x7f:
    64  				r, size = '\ufffd', 1 // c1 is ASCII so output on next round
    65  				goto write
    66  			case c1 < 0x9f:
    67  				c0--
    68  				c1 -= 0x41
    69  			case c1 < 0xfd:
    70  				c1 -= 0x9f
    71  			default:
    72  				r, size = '\ufffd', 2
    73  				goto write
    74  			}
    75  			r, size = '\ufffd', 2
    76  			if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
    77  				r = rune(jis0208Decode[i])
    78  				if r == 0 {
    79  					r = '\ufffd'
    80  				}
    81  			}
    82  
    83  		case c0 == 0x80:
    84  			r, size = 0x80, 1
    85  
    86  		default:
    87  			r, size = '\ufffd', 1
    88  		}
    89  	write:
    90  		if nDst+utf8.RuneLen(r) > len(dst) {
    91  			err = transform.ErrShortDst
    92  			break loop
    93  		}
    94  		nDst += utf8.EncodeRune(dst[nDst:], r)
    95  	}
    96  	return nDst, nSrc, err
    97  }
    98  
    99  type shiftJISEncoder struct{ transform.NopResetter }
   100  
   101  func (shiftJISEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   102  	r, size := rune(0), 0
   103  loop:
   104  	for ; nSrc < len(src); nSrc += size {
   105  		r = rune(src[nSrc])
   106  
   107  		// Decode a 1-byte rune.
   108  		if r < utf8.RuneSelf {
   109  			size = 1
   110  
   111  		} else {
   112  			// Decode a multi-byte rune.
   113  			r, size = utf8.DecodeRune(src[nSrc:])
   114  			if size == 1 {
   115  				// All valid runes of size 1 (those below utf8.RuneSelf) were
   116  				// handled above. We have invalid UTF-8 or we haven't seen the
   117  				// full character yet.
   118  				if !atEOF && !utf8.FullRune(src[nSrc:]) {
   119  					err = transform.ErrShortSrc
   120  					break loop
   121  				}
   122  			}
   123  
   124  			// func init checks that the switch covers all tables.
   125  			switch {
   126  			case encode0Low <= r && r < encode0High:
   127  				if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
   128  					goto write2
   129  				}
   130  			case encode1Low <= r && r < encode1High:
   131  				if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
   132  					goto write2
   133  				}
   134  			case encode2Low <= r && r < encode2High:
   135  				if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
   136  					goto write2
   137  				}
   138  			case encode3Low <= r && r < encode3High:
   139  				if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
   140  					goto write2
   141  				}
   142  			case encode4Low <= r && r < encode4High:
   143  				if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
   144  					goto write2
   145  				}
   146  			case encode5Low <= r && r < encode5High:
   147  				if 0xff61 <= r && r < 0xffa0 {
   148  					r -= 0xff61 - 0xa1
   149  					goto write1
   150  				}
   151  				if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
   152  					goto write2
   153  				}
   154  			}
   155  			err = internal.ErrASCIIReplacement
   156  			break
   157  		}
   158  
   159  	write1:
   160  		if nDst >= len(dst) {
   161  			err = transform.ErrShortDst
   162  			break
   163  		}
   164  		dst[nDst] = uint8(r)
   165  		nDst++
   166  		continue
   167  
   168  	write2:
   169  		j1 := uint8(r>>codeShift) & codeMask
   170  		j2 := uint8(r) & codeMask
   171  		if nDst+2 > len(dst) {
   172  			err = transform.ErrShortDst
   173  			break loop
   174  		}
   175  		if j1 <= 61 {
   176  			dst[nDst+0] = 129 + j1/2
   177  		} else {
   178  			dst[nDst+0] = 193 + j1/2
   179  		}
   180  		if j1&1 == 0 {
   181  			dst[nDst+1] = j2 + j2/63 + 64
   182  		} else {
   183  			dst[nDst+1] = j2 + 159
   184  		}
   185  		nDst += 2
   186  		continue
   187  	}
   188  	return nDst, nSrc, err
   189  }
   190  

View as plain text