...

Source file src/unicode/graphic.go

Documentation: unicode

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package unicode
     6  
     7  // Bit masks for each code point under U+0100, for fast lookup.
     8  const (
     9  	pC     = 1 << iota // a control character.
    10  	pP                 // a punctuation character.
    11  	pN                 // a numeral.
    12  	pS                 // a symbolic character.
    13  	pZ                 // a spacing character.
    14  	pLu                // an upper-case letter.
    15  	pLl                // a lower-case letter.
    16  	pp                 // a printable character according to Go's definition.
    17  	pg     = pp | pZ   // a graphical character according to the Unicode definition.
    18  	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
    19  	pLmask = pLo
    20  )
    21  
    22  // GraphicRanges defines the set of graphic characters according to Unicode.
    23  var GraphicRanges = []*RangeTable{
    24  	L, M, N, P, S, Zs,
    25  }
    26  
    27  // PrintRanges defines the set of printable characters according to Go.
    28  // ASCII space, U+0020, is handled separately.
    29  var PrintRanges = []*RangeTable{
    30  	L, M, N, P, S,
    31  }
    32  
    33  // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
    34  // Such characters include letters, marks, numbers, punctuation, symbols, and
    35  // spaces, from categories [L], [M], [N], [P], [S], [Zs].
    36  func IsGraphic(r rune) bool {
    37  	// We convert to uint32 to avoid the extra test for negative,
    38  	// and in the index we convert to uint8 to avoid the range check.
    39  	if uint32(r) <= MaxLatin1 {
    40  		return properties[uint8(r)]&pg != 0
    41  	}
    42  	return In(r, GraphicRanges...)
    43  }
    44  
    45  // IsPrint reports whether the rune is defined as printable by Go. Such
    46  // characters include letters, marks, numbers, punctuation, symbols, and the
    47  // ASCII space character, from categories [L], [M], [N], [P], [S] and the ASCII space
    48  // character. This categorization is the same as [IsGraphic] except that the
    49  // only spacing character is ASCII space, U+0020.
    50  func IsPrint(r rune) bool {
    51  	if uint32(r) <= MaxLatin1 {
    52  		return properties[uint8(r)]&pp != 0
    53  	}
    54  	return In(r, PrintRanges...)
    55  }
    56  
    57  // IsOneOf reports whether the rune is a member of one of the ranges.
    58  // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
    59  func IsOneOf(ranges []*RangeTable, r rune) bool {
    60  	for _, inside := range ranges {
    61  		if Is(inside, r) {
    62  			return true
    63  		}
    64  	}
    65  	return false
    66  }
    67  
    68  // In reports whether the rune is a member of one of the ranges.
    69  func In(r rune, ranges ...*RangeTable) bool {
    70  	for _, inside := range ranges {
    71  		if Is(inside, r) {
    72  			return true
    73  		}
    74  	}
    75  	return false
    76  }
    77  
    78  // IsControl reports whether the rune is a control character.
    79  // The [C] ([Other]) Unicode category includes more code points
    80  // such as surrogates; use [Is](C, r) to test for them.
    81  func IsControl(r rune) bool {
    82  	if uint32(r) <= MaxLatin1 {
    83  		return properties[uint8(r)]&pC != 0
    84  	}
    85  	// All control characters are < MaxLatin1.
    86  	return false
    87  }
    88  
    89  // IsLetter reports whether the rune is a letter (category [L]).
    90  func IsLetter(r rune) bool {
    91  	if uint32(r) <= MaxLatin1 {
    92  		return properties[uint8(r)]&(pLmask) != 0
    93  	}
    94  	return isExcludingLatin(Letter, r)
    95  }
    96  
    97  // IsMark reports whether the rune is a mark character (category [M]).
    98  func IsMark(r rune) bool {
    99  	// There are no mark characters in Latin-1.
   100  	return isExcludingLatin(Mark, r)
   101  }
   102  
   103  // IsNumber reports whether the rune is a number (category [N]).
   104  func IsNumber(r rune) bool {
   105  	if uint32(r) <= MaxLatin1 {
   106  		return properties[uint8(r)]&pN != 0
   107  	}
   108  	return isExcludingLatin(Number, r)
   109  }
   110  
   111  // IsPunct reports whether the rune is a Unicode punctuation character
   112  // (category [P]).
   113  func IsPunct(r rune) bool {
   114  	if uint32(r) <= MaxLatin1 {
   115  		return properties[uint8(r)]&pP != 0
   116  	}
   117  	return Is(Punct, r)
   118  }
   119  
   120  // IsSpace reports whether the rune is a space character as defined
   121  // by Unicode's White Space property; in the Latin-1 space
   122  // this is
   123  //
   124  //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
   125  //
   126  // Other definitions of spacing characters are set by category
   127  // Z and property [Pattern_White_Space].
   128  func IsSpace(r rune) bool {
   129  	// This property isn't the same as Z; special-case it.
   130  	if uint32(r) <= MaxLatin1 {
   131  		switch r {
   132  		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
   133  			return true
   134  		}
   135  		return false
   136  	}
   137  	return isExcludingLatin(White_Space, r)
   138  }
   139  
   140  // IsSymbol reports whether the rune is a symbolic character.
   141  func IsSymbol(r rune) bool {
   142  	if uint32(r) <= MaxLatin1 {
   143  		return properties[uint8(r)]&pS != 0
   144  	}
   145  	return isExcludingLatin(Symbol, r)
   146  }
   147  

View as plain text