...

Source file src/gopkg.in/alexcesaro/quotedprintable.v3/encodedword.go

Documentation: gopkg.in/alexcesaro/quotedprintable.v3

     1  package quotedprintable
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/base64"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // A WordEncoder is a RFC 2047 encoded-word encoder.
    15  type WordEncoder byte
    16  
    17  const (
    18  	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
    19  	BEncoding = WordEncoder('b')
    20  	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
    21  	QEncoding = WordEncoder('q')
    22  )
    23  
    24  var (
    25  	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
    26  )
    27  
    28  // Encode returns the encoded-word form of s. If s is ASCII without special
    29  // characters, it is returned unchanged. The provided charset is the IANA
    30  // charset name of s. It is case insensitive.
    31  func (e WordEncoder) Encode(charset, s string) string {
    32  	if !needsEncoding(s) {
    33  		return s
    34  	}
    35  	return e.encodeWord(charset, s)
    36  }
    37  
    38  func needsEncoding(s string) bool {
    39  	for _, b := range s {
    40  		if (b < ' ' || b > '~') && b != '\t' {
    41  			return true
    42  		}
    43  	}
    44  	return false
    45  }
    46  
    47  // encodeWord encodes a string into an encoded-word.
    48  func (e WordEncoder) encodeWord(charset, s string) string {
    49  	buf := getBuffer()
    50  	defer putBuffer(buf)
    51  
    52  	buf.WriteString("=?")
    53  	buf.WriteString(charset)
    54  	buf.WriteByte('?')
    55  	buf.WriteByte(byte(e))
    56  	buf.WriteByte('?')
    57  
    58  	if e == BEncoding {
    59  		w := base64.NewEncoder(base64.StdEncoding, buf)
    60  		io.WriteString(w, s)
    61  		w.Close()
    62  	} else {
    63  		enc := make([]byte, 3)
    64  		for i := 0; i < len(s); i++ {
    65  			b := s[i]
    66  			switch {
    67  			case b == ' ':
    68  				buf.WriteByte('_')
    69  			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
    70  				buf.WriteByte(b)
    71  			default:
    72  				enc[0] = '='
    73  				enc[1] = upperhex[b>>4]
    74  				enc[2] = upperhex[b&0x0f]
    75  				buf.Write(enc)
    76  			}
    77  		}
    78  	}
    79  	buf.WriteString("?=")
    80  	return buf.String()
    81  }
    82  
    83  const upperhex = "0123456789ABCDEF"
    84  
    85  // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
    86  type WordDecoder struct {
    87  	// CharsetReader, if non-nil, defines a function to generate
    88  	// charset-conversion readers, converting from the provided
    89  	// charset into UTF-8.
    90  	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
    91  	// are handled by default.
    92  	// One of the the CharsetReader's result values must be non-nil.
    93  	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
    94  }
    95  
    96  // Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
    97  // word is returned unchanged.
    98  func (d *WordDecoder) Decode(word string) (string, error) {
    99  	fields := strings.Split(word, "?") // TODO: remove allocation?
   100  	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
   101  		return "", errInvalidWord
   102  	}
   103  
   104  	content, err := decode(fields[2][0], fields[3])
   105  	if err != nil {
   106  		return "", err
   107  	}
   108  
   109  	buf := getBuffer()
   110  	defer putBuffer(buf)
   111  
   112  	if err := d.convert(buf, fields[1], content); err != nil {
   113  		return "", err
   114  	}
   115  
   116  	return buf.String(), nil
   117  }
   118  
   119  // DecodeHeader decodes all encoded-words of the given string. It returns an
   120  // error if and only if CharsetReader of d returns an error.
   121  func (d *WordDecoder) DecodeHeader(header string) (string, error) {
   122  	// If there is no encoded-word, returns before creating a buffer.
   123  	i := strings.Index(header, "=?")
   124  	if i == -1 {
   125  		return header, nil
   126  	}
   127  
   128  	buf := getBuffer()
   129  	defer putBuffer(buf)
   130  
   131  	buf.WriteString(header[:i])
   132  	header = header[i:]
   133  
   134  	betweenWords := false
   135  	for {
   136  		start := strings.Index(header, "=?")
   137  		if start == -1 {
   138  			break
   139  		}
   140  		cur := start + len("=?")
   141  
   142  		i := strings.Index(header[cur:], "?")
   143  		if i == -1 {
   144  			break
   145  		}
   146  		charset := header[cur : cur+i]
   147  		cur += i + len("?")
   148  
   149  		if len(header) < cur+len("Q??=") {
   150  			break
   151  		}
   152  		encoding := header[cur]
   153  		cur++
   154  
   155  		if header[cur] != '?' {
   156  			break
   157  		}
   158  		cur++
   159  
   160  		j := strings.Index(header[cur:], "?=")
   161  		if j == -1 {
   162  			break
   163  		}
   164  		text := header[cur : cur+j]
   165  		end := cur + j + len("?=")
   166  
   167  		content, err := decode(encoding, text)
   168  		if err != nil {
   169  			betweenWords = false
   170  			buf.WriteString(header[:start+2])
   171  			header = header[start+2:]
   172  			continue
   173  		}
   174  
   175  		// Write characters before the encoded-word. White-space and newline
   176  		// characters separating two encoded-words must be deleted.
   177  		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
   178  			buf.WriteString(header[:start])
   179  		}
   180  
   181  		if err := d.convert(buf, charset, content); err != nil {
   182  			return "", err
   183  		}
   184  
   185  		header = header[end:]
   186  		betweenWords = true
   187  	}
   188  
   189  	if len(header) > 0 {
   190  		buf.WriteString(header)
   191  	}
   192  
   193  	return buf.String(), nil
   194  }
   195  
   196  func decode(encoding byte, text string) ([]byte, error) {
   197  	switch encoding {
   198  	case 'B', 'b':
   199  		return base64.StdEncoding.DecodeString(text)
   200  	case 'Q', 'q':
   201  		return qDecode(text)
   202  	}
   203  	return nil, errInvalidWord
   204  }
   205  
   206  func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
   207  	switch {
   208  	case strings.EqualFold("utf-8", charset):
   209  		buf.Write(content)
   210  	case strings.EqualFold("iso-8859-1", charset):
   211  		for _, c := range content {
   212  			buf.WriteRune(rune(c))
   213  		}
   214  	case strings.EqualFold("us-ascii", charset):
   215  		for _, c := range content {
   216  			if c >= utf8.RuneSelf {
   217  				buf.WriteRune(unicode.ReplacementChar)
   218  			} else {
   219  				buf.WriteByte(c)
   220  			}
   221  		}
   222  	default:
   223  		if d.CharsetReader == nil {
   224  			return fmt.Errorf("mime: unhandled charset %q", charset)
   225  		}
   226  		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
   227  		if err != nil {
   228  			return err
   229  		}
   230  		if _, err = buf.ReadFrom(r); err != nil {
   231  			return err
   232  		}
   233  	}
   234  	return nil
   235  }
   236  
   237  // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
   238  // one byte of non-whitespace.
   239  func hasNonWhitespace(s string) bool {
   240  	for _, b := range s {
   241  		switch b {
   242  		// Encoded-words can only be separated by linear white spaces which does
   243  		// not include vertical tabs (\v).
   244  		case ' ', '\t', '\n', '\r':
   245  		default:
   246  			return true
   247  		}
   248  	}
   249  	return false
   250  }
   251  
   252  // qDecode decodes a Q encoded string.
   253  func qDecode(s string) ([]byte, error) {
   254  	dec := make([]byte, len(s))
   255  	n := 0
   256  	for i := 0; i < len(s); i++ {
   257  		switch c := s[i]; {
   258  		case c == '_':
   259  			dec[n] = ' '
   260  		case c == '=':
   261  			if i+2 >= len(s) {
   262  				return nil, errInvalidWord
   263  			}
   264  			b, err := readHexByte(s[i+1], s[i+2])
   265  			if err != nil {
   266  				return nil, err
   267  			}
   268  			dec[n] = b
   269  			i += 2
   270  		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
   271  			dec[n] = c
   272  		default:
   273  			return nil, errInvalidWord
   274  		}
   275  		n++
   276  	}
   277  
   278  	return dec[:n], nil
   279  }
   280  

View as plain text