...

Source file src/golang.org/x/text/unicode/rangetable/merge.go

Documentation: golang.org/x/text/unicode/rangetable

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rangetable
     6  
     7  import (
     8  	"unicode"
     9  )
    10  
    11  // atEnd is used to mark a completed iteration.
    12  const atEnd = unicode.MaxRune + 1
    13  
    14  // Merge returns a new RangeTable that is the union of the given tables.
    15  // It can also be used to compact user-created RangeTables. The entries in
    16  // R16 and R32 for any given RangeTable should be sorted and non-overlapping.
    17  //
    18  // A lookup in the resulting table can be several times faster than using In
    19  // directly on the ranges. Merge is an expensive operation, however, and only
    20  // makes sense if one intends to use the result for more than a couple of
    21  // hundred lookups.
    22  func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
    23  	rt := &unicode.RangeTable{}
    24  	if len(ranges) == 0 {
    25  		return rt
    26  	}
    27  
    28  	iter := tablesIter(make([]tableIndex, len(ranges)))
    29  
    30  	for i, t := range ranges {
    31  		iter[i] = tableIndex{t, 0, atEnd}
    32  		if len(t.R16) > 0 {
    33  			iter[i].next = rune(t.R16[0].Lo)
    34  		}
    35  	}
    36  
    37  	if r0 := iter.next16(); r0.Stride != 0 {
    38  		for {
    39  			r1 := iter.next16()
    40  			if r1.Stride == 0 {
    41  				rt.R16 = append(rt.R16, r0)
    42  				break
    43  			}
    44  			stride := r1.Lo - r0.Hi
    45  			if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
    46  				// Fully merge the next range into the previous one.
    47  				r0.Hi, r0.Stride = r1.Hi, stride
    48  				continue
    49  			} else if stride == r0.Stride {
    50  				// Move the first element of r1 to r0. This may eliminate an
    51  				// entry.
    52  				r0.Hi = r1.Lo
    53  				r0.Stride = stride
    54  				r1.Lo = r1.Lo + r1.Stride
    55  				if r1.Lo > r1.Hi {
    56  					continue
    57  				}
    58  			}
    59  			rt.R16 = append(rt.R16, r0)
    60  			r0 = r1
    61  		}
    62  	}
    63  
    64  	for i, t := range ranges {
    65  		iter[i] = tableIndex{t, 0, atEnd}
    66  		if len(t.R32) > 0 {
    67  			iter[i].next = rune(t.R32[0].Lo)
    68  		}
    69  	}
    70  
    71  	if r0 := iter.next32(); r0.Stride != 0 {
    72  		for {
    73  			r1 := iter.next32()
    74  			if r1.Stride == 0 {
    75  				rt.R32 = append(rt.R32, r0)
    76  				break
    77  			}
    78  			stride := r1.Lo - r0.Hi
    79  			if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
    80  				// Fully merge the next range into the previous one.
    81  				r0.Hi, r0.Stride = r1.Hi, stride
    82  				continue
    83  			} else if stride == r0.Stride {
    84  				// Move the first element of r1 to r0. This may eliminate an
    85  				// entry.
    86  				r0.Hi = r1.Lo
    87  				r1.Lo = r1.Lo + r1.Stride
    88  				if r1.Lo > r1.Hi {
    89  					continue
    90  				}
    91  			}
    92  			rt.R32 = append(rt.R32, r0)
    93  			r0 = r1
    94  		}
    95  	}
    96  
    97  	for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
    98  		rt.LatinOffset = i + 1
    99  	}
   100  
   101  	return rt
   102  }
   103  
   104  type tableIndex struct {
   105  	t    *unicode.RangeTable
   106  	p    uint32
   107  	next rune
   108  }
   109  
   110  type tablesIter []tableIndex
   111  
   112  // sortIter does an insertion sort using the next field of tableIndex. Insertion
   113  // sort is a good sorting algorithm for this case.
   114  func sortIter(t []tableIndex) {
   115  	for i := range t {
   116  		for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
   117  			t[j], t[j-1] = t[j-1], t[j]
   118  		}
   119  	}
   120  }
   121  
   122  // next16 finds the ranged to be added to the table. If ranges overlap between
   123  // multiple tables it clips the result to a non-overlapping range if the
   124  // elements are not fully subsumed. It returns a zero range if there are no more
   125  // ranges.
   126  func (ti tablesIter) next16() unicode.Range16 {
   127  	sortIter(ti)
   128  
   129  	t0 := ti[0]
   130  	if t0.next == atEnd {
   131  		return unicode.Range16{}
   132  	}
   133  	r0 := t0.t.R16[t0.p]
   134  	r0.Lo = uint16(t0.next)
   135  
   136  	// We restrict the Hi of the current range if it overlaps with another range.
   137  	for i := range ti {
   138  		tn := ti[i]
   139  		// Since our tableIndices are sorted by next, we can break if the there
   140  		// is no overlap. The first value of a next range can always be merged
   141  		// into the current one, so we can break in case of equality as well.
   142  		if rune(r0.Hi) <= tn.next {
   143  			break
   144  		}
   145  		rn := tn.t.R16[tn.p]
   146  		rn.Lo = uint16(tn.next)
   147  
   148  		// Limit r0.Hi based on next ranges in list, but allow it to overlap
   149  		// with ranges as long as it subsumes it.
   150  		m := (rn.Lo - r0.Lo) % r0.Stride
   151  		if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
   152  			// Overlap, take the min of the two Hi values: for simplicity's sake
   153  			// we only process one range at a time.
   154  			if r0.Hi > rn.Hi {
   155  				r0.Hi = rn.Hi
   156  			}
   157  		} else {
   158  			// Not a compatible stride. Set to the last possible value before
   159  			// rn.Lo, but ensure there is at least one value.
   160  			if x := rn.Lo - m; r0.Lo <= x {
   161  				r0.Hi = x
   162  			}
   163  			break
   164  		}
   165  	}
   166  
   167  	// Update the next values for each table.
   168  	for i := range ti {
   169  		tn := &ti[i]
   170  		if rune(r0.Hi) < tn.next {
   171  			break
   172  		}
   173  		rn := tn.t.R16[tn.p]
   174  		stride := rune(rn.Stride)
   175  		tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
   176  		if rune(rn.Hi) < tn.next {
   177  			if tn.p++; int(tn.p) == len(tn.t.R16) {
   178  				tn.next = atEnd
   179  			} else {
   180  				tn.next = rune(tn.t.R16[tn.p].Lo)
   181  			}
   182  		}
   183  	}
   184  
   185  	if r0.Lo == r0.Hi {
   186  		r0.Stride = 1
   187  	}
   188  
   189  	return r0
   190  }
   191  
   192  // next32 finds the ranged to be added to the table. If ranges overlap between
   193  // multiple tables it clips the result to a non-overlapping range if the
   194  // elements are not fully subsumed. It returns a zero range if there are no more
   195  // ranges.
   196  func (ti tablesIter) next32() unicode.Range32 {
   197  	sortIter(ti)
   198  
   199  	t0 := ti[0]
   200  	if t0.next == atEnd {
   201  		return unicode.Range32{}
   202  	}
   203  	r0 := t0.t.R32[t0.p]
   204  	r0.Lo = uint32(t0.next)
   205  
   206  	// We restrict the Hi of the current range if it overlaps with another range.
   207  	for i := range ti {
   208  		tn := ti[i]
   209  		// Since our tableIndices are sorted by next, we can break if the there
   210  		// is no overlap. The first value of a next range can always be merged
   211  		// into the current one, so we can break in case of equality as well.
   212  		if rune(r0.Hi) <= tn.next {
   213  			break
   214  		}
   215  		rn := tn.t.R32[tn.p]
   216  		rn.Lo = uint32(tn.next)
   217  
   218  		// Limit r0.Hi based on next ranges in list, but allow it to overlap
   219  		// with ranges as long as it subsumes it.
   220  		m := (rn.Lo - r0.Lo) % r0.Stride
   221  		if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
   222  			// Overlap, take the min of the two Hi values: for simplicity's sake
   223  			// we only process one range at a time.
   224  			if r0.Hi > rn.Hi {
   225  				r0.Hi = rn.Hi
   226  			}
   227  		} else {
   228  			// Not a compatible stride. Set to the last possible value before
   229  			// rn.Lo, but ensure there is at least one value.
   230  			if x := rn.Lo - m; r0.Lo <= x {
   231  				r0.Hi = x
   232  			}
   233  			break
   234  		}
   235  	}
   236  
   237  	// Update the next values for each table.
   238  	for i := range ti {
   239  		tn := &ti[i]
   240  		if rune(r0.Hi) < tn.next {
   241  			break
   242  		}
   243  		rn := tn.t.R32[tn.p]
   244  		stride := rune(rn.Stride)
   245  		tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
   246  		if rune(rn.Hi) < tn.next {
   247  			if tn.p++; int(tn.p) == len(tn.t.R32) {
   248  				tn.next = atEnd
   249  			} else {
   250  				tn.next = rune(tn.t.R32[tn.p].Lo)
   251  			}
   252  		}
   253  	}
   254  
   255  	if r0.Lo == r0.Hi {
   256  		r0.Stride = 1
   257  	}
   258  
   259  	return r0
   260  }
   261  

View as plain text