pattern.go

Documentation: golang.org/x/text/search

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package search
     6  
     7  import (
     8  	"golang.org/x/text/internal/colltab"
     9  )
    10  
    11  // TODO: handle variable primary weights?
    12  
    13  func (p *Pattern) deleteEmptyElements() {
    14  	k := 0
    15  	for _, e := range p.ce {
    16  		if !isIgnorable(p.m, e) {
    17  			p.ce[k] = e
    18  			k++
    19  		}
    20  	}
    21  	p.ce = p.ce[:k]
    22  }
    23  
    24  func isIgnorable(m *Matcher, e colltab.Elem) bool {
    25  	if e.Primary() > 0 {
    26  		return false
    27  	}
    28  	if e.Secondary() > 0 {
    29  		if !m.ignoreDiacritics {
    30  			return false
    31  		}
    32  		// Primary value is 0 and ignoreDiacritics is true. In this case we
    33  		// ignore the tertiary element, as it only pertains to the modifier.
    34  		return true
    35  	}
    36  	// TODO: further distinguish once we have the new implementation.
    37  	if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
    38  		return false
    39  	}
    40  	// TODO: we ignore the Quaternary level for now.
    41  	return true
    42  }
    43  
    44  // TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
    45  
    46  func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
    47  	for start := 0; it.Next(); it.Reset(start) {
    48  		nextStart := it.End()
    49  		if end := p.searchOnce(it); end != -1 {
    50  			return start, end
    51  		}
    52  		start = nextStart
    53  	}
    54  	return -1, -1
    55  }
    56  
    57  func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
    58  	if it.Next() {
    59  		if end := p.searchOnce(it); end != -1 {
    60  			return 0, end
    61  		}
    62  	}
    63  	return -1, -1
    64  }
    65  
    66  // next advances to the next weight in a pattern. f must return one of the
    67  // weights of a collation element. next will advance to the first non-zero
    68  // weight and return this weight and true if it exists, or 0, false otherwise.
    69  func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
    70  	for *i < len(p.ce) {
    71  		v := f(p.ce[*i])
    72  		*i++
    73  		if v != 0 {
    74  			// Skip successive ignorable values.
    75  			for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
    76  			}
    77  			return v, true
    78  		}
    79  	}
    80  	return 0, false
    81  }
    82  
    83  // TODO: remove this function once Elem is internal and Tertiary returns int.
    84  func tertiary(e colltab.Elem) int {
    85  	return int(e.Tertiary())
    86  }
    87  
    88  // searchOnce tries to match the pattern s.p at the text position i. s.buf needs
    89  // to be filled with collation elements of the first segment, where n is the
    90  // number of source bytes consumed for this segment. It will return the end
    91  // position of the match or -1.
    92  func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
    93  	var pLevel [4]int
    94  
    95  	m := p.m
    96  	for {
    97  		k := 0
    98  		for ; k < it.N; k++ {
    99  			if v := it.Elems[k].Primary(); v > 0 {
   100  				if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
   101  					return -1
   102  				}
   103  			}
   104  
   105  			if !m.ignoreDiacritics {
   106  				if v := it.Elems[k].Secondary(); v > 0 {
   107  					if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
   108  						return -1
   109  					}
   110  				}
   111  			} else if it.Elems[k].Primary() == 0 {
   112  				// We ignore tertiary values of collation elements of the
   113  				// secondary level.
   114  				continue
   115  			}
   116  
   117  			// TODO: distinguish between case and width. This will be easier to
   118  			// implement after we moved to the new collation implementation.
   119  			if !m.ignoreWidth && !m.ignoreCase {
   120  				if v := it.Elems[k].Tertiary(); v > 0 {
   121  					if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
   122  						return -1
   123  					}
   124  				}
   125  			}
   126  			// TODO: check quaternary weight
   127  		}
   128  		it.Discard() // Remove the current segment from the buffer.
   129  
   130  		// Check for completion.
   131  		switch {
   132  		// If any of these cases match, we are not at the end.
   133  		case pLevel[0] < len(p.ce):
   134  		case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
   135  		case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
   136  		default:
   137  			// At this point, both the segment and pattern has matched fully.
   138  			// However, the segment may still be have trailing modifiers.
   139  			// This can be verified by another call to next.
   140  			end = it.End()
   141  			if it.Next() && it.Elems[0].Primary() == 0 {
   142  				if !m.ignoreDiacritics {
   143  					return -1
   144  				}
   145  				end = it.End()
   146  			}
   147  			return end
   148  		}
   149  
   150  		// Fill the buffer with the next batch of collation elements.
   151  		if !it.Next() {
   152  			return -1
   153  		}
   154  	}
   155  }
   156
View as plain text