...

Source file src/golang.org/x/text/runes/cond.go

Documentation: golang.org/x/text/runes

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runes
     6  
     7  import (
     8  	"unicode/utf8"
     9  
    10  	"golang.org/x/text/transform"
    11  )
    12  
    13  // Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
    14  // This is done for various reasons:
    15  // - To retain the semantics of the Nop transformer: if input is passed to a Nop
    16  //   one would expect it to be unchanged.
    17  // - It would be very expensive to pass a converted RuneError to a transformer:
    18  //   a transformer might need more source bytes after RuneError, meaning that
    19  //   the only way to pass it safely is to create a new buffer and manage the
    20  //   intermingling of RuneErrors and normal input.
    21  // - Many transformers leave ill-formed UTF-8 as is, so this is not
    22  //   inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
    23  //   logical consequence of the operation (as for Map) or if it otherwise would
    24  //   pose security concerns (as for Remove).
    25  // - An alternative would be to return an error on ill-formed UTF-8, but this
    26  //   would be inconsistent with other operations.
    27  
    28  // If returns a transformer that applies tIn to consecutive runes for which
    29  // s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
    30  // is called on tIn and tNotIn at the start of each run. A Nop transformer will
    31  // substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
    32  // to RuneError to determine which transformer to apply, but is passed as is to
    33  // the respective transformer.
    34  func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
    35  	if tIn == nil && tNotIn == nil {
    36  		return Transformer{transform.Nop}
    37  	}
    38  	if tIn == nil {
    39  		tIn = transform.Nop
    40  	}
    41  	if tNotIn == nil {
    42  		tNotIn = transform.Nop
    43  	}
    44  	sIn, ok := tIn.(transform.SpanningTransformer)
    45  	if !ok {
    46  		sIn = dummySpan{tIn}
    47  	}
    48  	sNotIn, ok := tNotIn.(transform.SpanningTransformer)
    49  	if !ok {
    50  		sNotIn = dummySpan{tNotIn}
    51  	}
    52  
    53  	a := &cond{
    54  		tIn:    sIn,
    55  		tNotIn: sNotIn,
    56  		f:      s.Contains,
    57  	}
    58  	a.Reset()
    59  	return Transformer{a}
    60  }
    61  
    62  type dummySpan struct{ transform.Transformer }
    63  
    64  func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
    65  	return 0, transform.ErrEndOfSpan
    66  }
    67  
    68  type cond struct {
    69  	tIn, tNotIn transform.SpanningTransformer
    70  	f           func(rune) bool
    71  	check       func(rune) bool               // current check to perform
    72  	t           transform.SpanningTransformer // current transformer to use
    73  }
    74  
    75  // Reset implements transform.Transformer.
    76  func (t *cond) Reset() {
    77  	t.check = t.is
    78  	t.t = t.tIn
    79  	t.t.Reset() // notIn will be reset on first usage.
    80  }
    81  
    82  func (t *cond) is(r rune) bool {
    83  	if t.f(r) {
    84  		return true
    85  	}
    86  	t.check = t.isNot
    87  	t.t = t.tNotIn
    88  	t.tNotIn.Reset()
    89  	return false
    90  }
    91  
    92  func (t *cond) isNot(r rune) bool {
    93  	if !t.f(r) {
    94  		return true
    95  	}
    96  	t.check = t.is
    97  	t.t = t.tIn
    98  	t.tIn.Reset()
    99  	return false
   100  }
   101  
   102  // This implementation of Span doesn't help all too much, but it needs to be
   103  // there to satisfy this package's Transformer interface.
   104  // TODO: there are certainly room for improvements, though. For example, if
   105  // t.t == transform.Nop (which will a common occurrence) it will save a bundle
   106  // to special-case that loop.
   107  func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
   108  	p := 0
   109  	for n < len(src) && err == nil {
   110  		// Don't process too much at a time as the Spanner that will be
   111  		// called on this block may terminate early.
   112  		const maxChunk = 4096
   113  		max := len(src)
   114  		if v := n + maxChunk; v < max {
   115  			max = v
   116  		}
   117  		atEnd := false
   118  		size := 0
   119  		current := t.t
   120  		for ; p < max; p += size {
   121  			r := rune(src[p])
   122  			if r < utf8.RuneSelf {
   123  				size = 1
   124  			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
   125  				if !atEOF && !utf8.FullRune(src[p:]) {
   126  					err = transform.ErrShortSrc
   127  					break
   128  				}
   129  			}
   130  			if !t.check(r) {
   131  				// The next rune will be the start of a new run.
   132  				atEnd = true
   133  				break
   134  			}
   135  		}
   136  		n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
   137  		n += n2
   138  		if err2 != nil {
   139  			return n, err2
   140  		}
   141  		// At this point either err != nil or t.check will pass for the rune at p.
   142  		p = n + size
   143  	}
   144  	return n, err
   145  }
   146  
   147  func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   148  	p := 0
   149  	for nSrc < len(src) && err == nil {
   150  		// Don't process too much at a time, as the work might be wasted if the
   151  		// destination buffer isn't large enough to hold the result or a
   152  		// transform returns an error early.
   153  		const maxChunk = 4096
   154  		max := len(src)
   155  		if n := nSrc + maxChunk; n < len(src) {
   156  			max = n
   157  		}
   158  		atEnd := false
   159  		size := 0
   160  		current := t.t
   161  		for ; p < max; p += size {
   162  			r := rune(src[p])
   163  			if r < utf8.RuneSelf {
   164  				size = 1
   165  			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
   166  				if !atEOF && !utf8.FullRune(src[p:]) {
   167  					err = transform.ErrShortSrc
   168  					break
   169  				}
   170  			}
   171  			if !t.check(r) {
   172  				// The next rune will be the start of a new run.
   173  				atEnd = true
   174  				break
   175  			}
   176  		}
   177  		nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
   178  		nDst += nDst2
   179  		nSrc += nSrc2
   180  		if err2 != nil {
   181  			return nDst, nSrc, err2
   182  		}
   183  		// At this point either err != nil or t.check will pass for the rune at p.
   184  		p = nSrc + size
   185  	}
   186  	return nDst, nSrc, err
   187  }
   188  

View as plain text