...
Run Format

Source file src/unicode/graphic.go

Documentation: unicode

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package unicode
  
  // Bit masks for each code point under U+0100, for fast lookup.
  const (
  	pC     = 1 << iota // a control character.
  	pP                 // a punctuation character.
  	pN                 // a numeral.
  	pS                 // a symbolic character.
  	pZ                 // a spacing character.
  	pLu                // an upper-case letter.
  	pLl                // a lower-case letter.
  	pp                 // a printable character according to Go's definition.
  	pg     = pp | pZ   // a graphical character according to the Unicode definition.
  	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
  	pLmask = pLo
  )
  
  // GraphicRanges defines the set of graphic characters according to Unicode.
  var GraphicRanges = []*RangeTable{
  	L, M, N, P, S, Zs,
  }
  
  // PrintRanges defines the set of printable characters according to Go.
  // ASCII space, U+0020, is handled separately.
  var PrintRanges = []*RangeTable{
  	L, M, N, P, S,
  }
  
  // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
  // Such characters include letters, marks, numbers, punctuation, symbols, and
  // spaces, from categories L, M, N, P, S, Zs.
  func IsGraphic(r rune) bool {
  	// We convert to uint32 to avoid the extra test for negative,
  	// and in the index we convert to uint8 to avoid the range check.
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&pg != 0
  	}
  	return In(r, GraphicRanges...)
  }
  
  // IsPrint reports whether the rune is defined as printable by Go. Such
  // characters include letters, marks, numbers, punctuation, symbols, and the
  // ASCII space character, from categories L, M, N, P, S and the ASCII space
  // character. This categorization is the same as IsGraphic except that the
  // only spacing character is ASCII space, U+0020.
  func IsPrint(r rune) bool {
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&pp != 0
  	}
  	return In(r, PrintRanges...)
  }
  
  // IsOneOf reports whether the rune is a member of one of the ranges.
  // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
  func IsOneOf(ranges []*RangeTable, r rune) bool {
  	for _, inside := range ranges {
  		if Is(inside, r) {
  			return true
  		}
  	}
  	return false
  }
  
  // In reports whether the rune is a member of one of the ranges.
  func In(r rune, ranges ...*RangeTable) bool {
  	for _, inside := range ranges {
  		if Is(inside, r) {
  			return true
  		}
  	}
  	return false
  }
  
  // IsControl reports whether the rune is a control character.
  // The C (Other) Unicode category includes more code points
  // such as surrogates; use Is(C, r) to test for them.
  func IsControl(r rune) bool {
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&pC != 0
  	}
  	// All control characters are < MaxLatin1.
  	return false
  }
  
  // IsLetter reports whether the rune is a letter (category L).
  func IsLetter(r rune) bool {
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&(pLmask) != 0
  	}
  	return isExcludingLatin(Letter, r)
  }
  
  // IsMark reports whether the rune is a mark character (category M).
  func IsMark(r rune) bool {
  	// There are no mark characters in Latin-1.
  	return isExcludingLatin(Mark, r)
  }
  
  // IsNumber reports whether the rune is a number (category N).
  func IsNumber(r rune) bool {
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&pN != 0
  	}
  	return isExcludingLatin(Number, r)
  }
  
  // IsPunct reports whether the rune is a Unicode punctuation character
  // (category P).
  func IsPunct(r rune) bool {
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&pP != 0
  	}
  	return Is(Punct, r)
  }
  
  // IsSpace reports whether the rune is a space character as defined
  // by Unicode's White Space property; in the Latin-1 space
  // this is
  //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
  // Other definitions of spacing characters are set by category
  // Z and property Pattern_White_Space.
  func IsSpace(r rune) bool {
  	// This property isn't the same as Z; special-case it.
  	if uint32(r) <= MaxLatin1 {
  		switch r {
  		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
  			return true
  		}
  		return false
  	}
  	return isExcludingLatin(White_Space, r)
  }
  
  // IsSymbol reports whether the rune is a symbolic character.
  func IsSymbol(r rune) bool {
  	if uint32(r) <= MaxLatin1 {
  		return properties[uint8(r)]&pS != 0
  	}
  	return isExcludingLatin(Symbol, r)
  }
  

View as plain text