...

Source file src/golang.org/x/text/unicode/bidi/bidi.go

Documentation: golang.org/x/text/unicode/bidi

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen.go gen_trieval.go gen_ranges.go
     6  
     7  // Package bidi contains functionality for bidirectional text support.
     8  //
     9  // See https://www.unicode.org/reports/tr9.
    10  //
    11  // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
    12  // and without notice.
    13  package bidi // import "golang.org/x/text/unicode/bidi"
    14  
    15  // TODO
    16  // - Transformer for reordering?
    17  // - Transformer (validator, really) for Bidi Rule.
    18  
    19  import (
    20  	"bytes"
    21  )
    22  
    23  // This API tries to avoid dealing with embedding levels for now. Under the hood
    24  // these will be computed, but the question is to which extent the user should
    25  // know they exist. We should at some point allow the user to specify an
    26  // embedding hierarchy, though.
    27  
    28  // A Direction indicates the overall flow of text.
    29  type Direction int
    30  
    31  const (
    32  	// LeftToRight indicates the text contains no right-to-left characters and
    33  	// that either there are some left-to-right characters or the option
    34  	// DefaultDirection(LeftToRight) was passed.
    35  	LeftToRight Direction = iota
    36  
    37  	// RightToLeft indicates the text contains no left-to-right characters and
    38  	// that either there are some right-to-left characters or the option
    39  	// DefaultDirection(RightToLeft) was passed.
    40  	RightToLeft
    41  
    42  	// Mixed indicates text contains both left-to-right and right-to-left
    43  	// characters.
    44  	Mixed
    45  
    46  	// Neutral means that text contains no left-to-right and right-to-left
    47  	// characters and that no default direction has been set.
    48  	Neutral
    49  )
    50  
    51  type options struct {
    52  	defaultDirection Direction
    53  }
    54  
    55  // An Option is an option for Bidi processing.
    56  type Option func(*options)
    57  
    58  // ICU allows the user to define embedding levels. This may be used, for example,
    59  // to use hierarchical structure of markup languages to define embeddings.
    60  // The following option may be a way to expose this functionality in this API.
    61  // // LevelFunc sets a function that associates nesting levels with the given text.
    62  // // The levels function will be called with monotonically increasing values for p.
    63  // func LevelFunc(levels func(p int) int) Option {
    64  // 	panic("unimplemented")
    65  // }
    66  
    67  // DefaultDirection sets the default direction for a Paragraph. The direction is
    68  // overridden if the text contains directional characters.
    69  func DefaultDirection(d Direction) Option {
    70  	return func(opts *options) {
    71  		opts.defaultDirection = d
    72  	}
    73  }
    74  
    75  // A Paragraph holds a single Paragraph for Bidi processing.
    76  type Paragraph struct {
    77  	p          []byte
    78  	o          Ordering
    79  	opts       []Option
    80  	types      []Class
    81  	pairTypes  []bracketType
    82  	pairValues []rune
    83  	runes      []rune
    84  	options    options
    85  }
    86  
    87  // Initialize the p.pairTypes, p.pairValues and p.types from the input previously
    88  // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
    89  // separator (bidi class B).
    90  //
    91  // The function p.Order() needs these values to be set, so this preparation could be postponed.
    92  // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
    93  // separator, the whole input needs to be processed anyway and should not be done twice.
    94  //
    95  // The function has the same return values as SetBytes() / SetString()
    96  func (p *Paragraph) prepareInput() (n int, err error) {
    97  	p.runes = bytes.Runes(p.p)
    98  	bytecount := 0
    99  	// clear slices from previous SetString or SetBytes
   100  	p.pairTypes = nil
   101  	p.pairValues = nil
   102  	p.types = nil
   103  
   104  	for _, r := range p.runes {
   105  		props, i := LookupRune(r)
   106  		bytecount += i
   107  		cls := props.Class()
   108  		if cls == B {
   109  			return bytecount, nil
   110  		}
   111  		p.types = append(p.types, cls)
   112  		if props.IsOpeningBracket() {
   113  			p.pairTypes = append(p.pairTypes, bpOpen)
   114  			p.pairValues = append(p.pairValues, r)
   115  		} else if props.IsBracket() {
   116  			// this must be a closing bracket,
   117  			// since IsOpeningBracket is not true
   118  			p.pairTypes = append(p.pairTypes, bpClose)
   119  			p.pairValues = append(p.pairValues, r)
   120  		} else {
   121  			p.pairTypes = append(p.pairTypes, bpNone)
   122  			p.pairValues = append(p.pairValues, 0)
   123  		}
   124  	}
   125  	return bytecount, nil
   126  }
   127  
   128  // SetBytes configures p for the given paragraph text. It replaces text
   129  // previously set by SetBytes or SetString. If b contains a paragraph separator
   130  // it will only process the first paragraph and report the number of bytes
   131  // consumed from b including this separator. Error may be non-nil if options are
   132  // given.
   133  func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
   134  	p.p = b
   135  	p.opts = opts
   136  	return p.prepareInput()
   137  }
   138  
   139  // SetString configures s for the given paragraph text. It replaces text
   140  // previously set by SetBytes or SetString. If s contains a paragraph separator
   141  // it will only process the first paragraph and report the number of bytes
   142  // consumed from s including this separator. Error may be non-nil if options are
   143  // given.
   144  func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
   145  	p.p = []byte(s)
   146  	p.opts = opts
   147  	return p.prepareInput()
   148  }
   149  
   150  // IsLeftToRight reports whether the principle direction of rendering for this
   151  // paragraphs is left-to-right. If this returns false, the principle direction
   152  // of rendering is right-to-left.
   153  func (p *Paragraph) IsLeftToRight() bool {
   154  	return p.Direction() == LeftToRight
   155  }
   156  
   157  // Direction returns the direction of the text of this paragraph.
   158  //
   159  // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
   160  func (p *Paragraph) Direction() Direction {
   161  	return p.o.Direction()
   162  }
   163  
   164  // TODO: what happens if the position is > len(input)? This should return an error.
   165  
   166  // RunAt reports the Run at the given position of the input text.
   167  //
   168  // This method can be used for computing line breaks on paragraphs.
   169  func (p *Paragraph) RunAt(pos int) Run {
   170  	c := 0
   171  	runNumber := 0
   172  	for i, r := range p.o.runes {
   173  		c += len(r)
   174  		if pos < c {
   175  			runNumber = i
   176  		}
   177  	}
   178  	return p.o.Run(runNumber)
   179  }
   180  
   181  func calculateOrdering(levels []level, runes []rune) Ordering {
   182  	var curDir Direction
   183  
   184  	prevDir := Neutral
   185  	prevI := 0
   186  
   187  	o := Ordering{}
   188  	// lvl = 0,2,4,...: left to right
   189  	// lvl = 1,3,5,...: right to left
   190  	for i, lvl := range levels {
   191  		if lvl%2 == 0 {
   192  			curDir = LeftToRight
   193  		} else {
   194  			curDir = RightToLeft
   195  		}
   196  		if curDir != prevDir {
   197  			if i > 0 {
   198  				o.runes = append(o.runes, runes[prevI:i])
   199  				o.directions = append(o.directions, prevDir)
   200  				o.startpos = append(o.startpos, prevI)
   201  			}
   202  			prevI = i
   203  			prevDir = curDir
   204  		}
   205  	}
   206  	o.runes = append(o.runes, runes[prevI:])
   207  	o.directions = append(o.directions, prevDir)
   208  	o.startpos = append(o.startpos, prevI)
   209  	return o
   210  }
   211  
   212  // Order computes the visual ordering of all the runs in a Paragraph.
   213  func (p *Paragraph) Order() (Ordering, error) {
   214  	if len(p.types) == 0 {
   215  		return Ordering{}, nil
   216  	}
   217  
   218  	for _, fn := range p.opts {
   219  		fn(&p.options)
   220  	}
   221  	lvl := level(-1)
   222  	if p.options.defaultDirection == RightToLeft {
   223  		lvl = 1
   224  	}
   225  	para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
   226  	if err != nil {
   227  		return Ordering{}, err
   228  	}
   229  
   230  	levels := para.getLevels([]int{len(p.types)})
   231  
   232  	p.o = calculateOrdering(levels, p.runes)
   233  	return p.o, nil
   234  }
   235  
   236  // Line computes the visual ordering of runs for a single line starting and
   237  // ending at the given positions in the original text.
   238  func (p *Paragraph) Line(start, end int) (Ordering, error) {
   239  	lineTypes := p.types[start:end]
   240  	para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
   241  	if err != nil {
   242  		return Ordering{}, err
   243  	}
   244  	levels := para.getLevels([]int{len(lineTypes)})
   245  	o := calculateOrdering(levels, p.runes[start:end])
   246  	return o, nil
   247  }
   248  
   249  // An Ordering holds the computed visual order of runs of a Paragraph. Calling
   250  // SetBytes or SetString on the originating Paragraph invalidates an Ordering.
   251  // The methods of an Ordering should only be called by one goroutine at a time.
   252  type Ordering struct {
   253  	runes      [][]rune
   254  	directions []Direction
   255  	startpos   []int
   256  }
   257  
   258  // Direction reports the directionality of the runs.
   259  //
   260  // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
   261  func (o *Ordering) Direction() Direction {
   262  	return o.directions[0]
   263  }
   264  
   265  // NumRuns returns the number of runs.
   266  func (o *Ordering) NumRuns() int {
   267  	return len(o.runes)
   268  }
   269  
   270  // Run returns the ith run within the ordering.
   271  func (o *Ordering) Run(i int) Run {
   272  	r := Run{
   273  		runes:     o.runes[i],
   274  		direction: o.directions[i],
   275  		startpos:  o.startpos[i],
   276  	}
   277  	return r
   278  }
   279  
   280  // TODO: perhaps with options.
   281  // // Reorder creates a reader that reads the runes in visual order per character.
   282  // // Modifiers remain after the runes they modify.
   283  // func (l *Runs) Reorder() io.Reader {
   284  // 	panic("unimplemented")
   285  // }
   286  
   287  // A Run is a continuous sequence of characters of a single direction.
   288  type Run struct {
   289  	runes     []rune
   290  	direction Direction
   291  	startpos  int
   292  }
   293  
   294  // String returns the text of the run in its original order.
   295  func (r *Run) String() string {
   296  	return string(r.runes)
   297  }
   298  
   299  // Bytes returns the text of the run in its original order.
   300  func (r *Run) Bytes() []byte {
   301  	return []byte(r.String())
   302  }
   303  
   304  // TODO: methods for
   305  // - Display order
   306  // - headers and footers
   307  // - bracket replacement.
   308  
   309  // Direction reports the direction of the run.
   310  func (r *Run) Direction() Direction {
   311  	return r.direction
   312  }
   313  
   314  // Pos returns the position of the Run within the text passed to SetBytes or SetString of the
   315  // originating Paragraph value.
   316  func (r *Run) Pos() (start, end int) {
   317  	return r.startpos, r.startpos + len(r.runes) - 1
   318  }
   319  
   320  // AppendReverse reverses the order of characters of in, appends them to out,
   321  // and returns the result. Modifiers will still follow the runes they modify.
   322  // Brackets are replaced with their counterparts.
   323  func AppendReverse(out, in []byte) []byte {
   324  	ret := make([]byte, len(in)+len(out))
   325  	copy(ret, out)
   326  	inRunes := bytes.Runes(in)
   327  
   328  	for i, r := range inRunes {
   329  		prop, _ := LookupRune(r)
   330  		if prop.IsBracket() {
   331  			inRunes[i] = prop.reverseBracket(r)
   332  		}
   333  	}
   334  
   335  	for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
   336  		inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
   337  	}
   338  	copy(ret[len(out):], string(inRunes))
   339  
   340  	return ret
   341  }
   342  
   343  // ReverseString reverses the order of characters in s and returns a new string.
   344  // Modifiers will still follow the runes they modify. Brackets are replaced with
   345  // their counterparts.
   346  func ReverseString(s string) string {
   347  	input := []rune(s)
   348  	li := len(input)
   349  	ret := make([]rune, li)
   350  	for i, r := range input {
   351  		prop, _ := LookupRune(r)
   352  		if prop.IsBracket() {
   353  			ret[li-i-1] = prop.reverseBracket(r)
   354  		} else {
   355  			ret[li-i-1] = r
   356  		}
   357  	}
   358  	return string(ret)
   359  }
   360  

View as plain text