...
Run Format

Source file src/fmt/scan.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package fmt
     6	
     7	import (
     8		"errors"
     9		"io"
    10		"math"
    11		"os"
    12		"reflect"
    13		"strconv"
    14		"sync"
    15		"unicode/utf8"
    16	)
    17	
    18	// runeUnreader is the interface to something that can unread runes.
    19	// If the object provided to Scan does not satisfy this interface,
    20	// a local buffer will be used to back up the input, but its contents
    21	// will be lost when Scan returns.
    22	type runeUnreader interface {
    23		UnreadRune() error
    24	}
    25	
    26	// ScanState represents the scanner state passed to custom scanners.
    27	// Scanners may do rune-at-a-time scanning or ask the ScanState
    28	// to discover the next space-delimited token.
    29	type ScanState interface {
    30		// ReadRune reads the next rune (Unicode code point) from the input.
    31		// If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
    32		// return EOF after returning the first '\n' or when reading beyond
    33		// the specified width.
    34		ReadRune() (r rune, size int, err error)
    35		// UnreadRune causes the next call to ReadRune to return the same rune.
    36		UnreadRune() error
    37		// SkipSpace skips space in the input. Newlines are treated appropriately
    38		// for the operation being performed; see the package documentation
    39		// for more information.
    40		SkipSpace()
    41		// Token skips space in the input if skipSpace is true, then returns the
    42		// run of Unicode code points c satisfying f(c).  If f is nil,
    43		// !unicode.IsSpace(c) is used; that is, the token will hold non-space
    44		// characters.  Newlines are treated appropriately for the operation being
    45		// performed; see the package documentation for more information.
    46		// The returned slice points to shared data that may be overwritten
    47		// by the next call to Token, a call to a Scan function using the ScanState
    48		// as input, or when the calling Scan method returns.
    49		Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
    50		// Width returns the value of the width option and whether it has been set.
    51		// The unit is Unicode code points.
    52		Width() (wid int, ok bool)
    53		// Because ReadRune is implemented by the interface, Read should never be
    54		// called by the scanning routines and a valid implementation of
    55		// ScanState may choose always to return an error from Read.
    56		Read(buf []byte) (n int, err error)
    57	}
    58	
    59	// Scanner is implemented by any value that has a Scan method, which scans
    60	// the input for the representation of a value and stores the result in the
    61	// receiver, which must be a pointer to be useful.  The Scan method is called
    62	// for any argument to Scan, Scanf, or Scanln that implements it.
    63	type Scanner interface {
    64		Scan(state ScanState, verb rune) error
    65	}
    66	
    67	// Scan scans text read from standard input, storing successive
    68	// space-separated values into successive arguments.  Newlines count
    69	// as space.  It returns the number of items successfully scanned.
    70	// If that is less than the number of arguments, err will report why.
    71	func Scan(a ...interface{}) (n int, err error) {
    72		return Fscan(os.Stdin, a...)
    73	}
    74	
    75	// Scanln is similar to Scan, but stops scanning at a newline and
    76	// after the final item there must be a newline or EOF.
    77	func Scanln(a ...interface{}) (n int, err error) {
    78		return Fscanln(os.Stdin, a...)
    79	}
    80	
    81	// Scanf scans text read from standard input, storing successive
    82	// space-separated values into successive arguments as determined by
    83	// the format.  It returns the number of items successfully scanned.
    84	// If that is less than the number of arguments, err will report why.
    85	// Newlines in the input must match newlines in the format.
    86	func Scanf(format string, a ...interface{}) (n int, err error) {
    87		return Fscanf(os.Stdin, format, a...)
    88	}
    89	
    90	type stringReader string
    91	
    92	func (r *stringReader) Read(b []byte) (n int, err error) {
    93		n = copy(b, *r)
    94		*r = (*r)[n:]
    95		if n == 0 {
    96			err = io.EOF
    97		}
    98		return
    99	}
   100	
   101	// Sscan scans the argument string, storing successive space-separated
   102	// values into successive arguments.  Newlines count as space.  It
   103	// returns the number of items successfully scanned.  If that is less
   104	// than the number of arguments, err will report why.
   105	func Sscan(str string, a ...interface{}) (n int, err error) {
   106		return Fscan((*stringReader)(&str), a...)
   107	}
   108	
   109	// Sscanln is similar to Sscan, but stops scanning at a newline and
   110	// after the final item there must be a newline or EOF.
   111	func Sscanln(str string, a ...interface{}) (n int, err error) {
   112		return Fscanln((*stringReader)(&str), a...)
   113	}
   114	
   115	// Sscanf scans the argument string, storing successive space-separated
   116	// values into successive arguments as determined by the format.  It
   117	// returns the number of items successfully parsed.
   118	// Newlines in the input must match newlines in the format.
   119	func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
   120		return Fscanf((*stringReader)(&str), format, a...)
   121	}
   122	
   123	// Fscan scans text read from r, storing successive space-separated
   124	// values into successive arguments.  Newlines count as space.  It
   125	// returns the number of items successfully scanned.  If that is less
   126	// than the number of arguments, err will report why.
   127	func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
   128		s, old := newScanState(r, true, false)
   129		n, err = s.doScan(a)
   130		s.free(old)
   131		return
   132	}
   133	
   134	// Fscanln is similar to Fscan, but stops scanning at a newline and
   135	// after the final item there must be a newline or EOF.
   136	func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
   137		s, old := newScanState(r, false, true)
   138		n, err = s.doScan(a)
   139		s.free(old)
   140		return
   141	}
   142	
   143	// Fscanf scans text read from r, storing successive space-separated
   144	// values into successive arguments as determined by the format.  It
   145	// returns the number of items successfully parsed.
   146	// Newlines in the input must match newlines in the format.
   147	func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
   148		s, old := newScanState(r, false, false)
   149		n, err = s.doScanf(format, a)
   150		s.free(old)
   151		return
   152	}
   153	
   154	// scanError represents an error generated by the scanning software.
   155	// It's used as a unique signature to identify such errors when recovering.
   156	type scanError struct {
   157		err error
   158	}
   159	
   160	const eof = -1
   161	
   162	// ss is the internal implementation of ScanState.
   163	type ss struct {
   164		rr       io.RuneReader // where to read input
   165		buf      buffer        // token accumulator
   166		peekRune rune          // one-rune lookahead
   167		prevRune rune          // last rune returned by ReadRune
   168		count    int           // runes consumed so far.
   169		atEOF    bool          // already read EOF
   170		ssave
   171	}
   172	
   173	// ssave holds the parts of ss that need to be
   174	// saved and restored on recursive scans.
   175	type ssave struct {
   176		validSave bool // is or was a part of an actual ss.
   177		nlIsEnd   bool // whether newline terminates scan
   178		nlIsSpace bool // whether newline counts as white space
   179		argLimit  int  // max value of ss.count for this arg; argLimit <= limit
   180		limit     int  // max value of ss.count.
   181		maxWid    int  // width of this arg.
   182	}
   183	
   184	// The Read method is only in ScanState so that ScanState
   185	// satisfies io.Reader. It will never be called when used as
   186	// intended, so there is no need to make it actually work.
   187	func (s *ss) Read(buf []byte) (n int, err error) {
   188		return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
   189	}
   190	
   191	func (s *ss) ReadRune() (r rune, size int, err error) {
   192		if s.peekRune >= 0 {
   193			s.count++
   194			r = s.peekRune
   195			size = utf8.RuneLen(r)
   196			s.prevRune = r
   197			s.peekRune = -1
   198			return
   199		}
   200		if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
   201			err = io.EOF
   202			return
   203		}
   204	
   205		r, size, err = s.rr.ReadRune()
   206		if err == nil {
   207			s.count++
   208			s.prevRune = r
   209		} else if err == io.EOF {
   210			s.atEOF = true
   211		}
   212		return
   213	}
   214	
   215	func (s *ss) Width() (wid int, ok bool) {
   216		if s.maxWid == hugeWid {
   217			return 0, false
   218		}
   219		return s.maxWid, true
   220	}
   221	
   222	// The public method returns an error; this private one panics.
   223	// If getRune reaches EOF, the return value is EOF (-1).
   224	func (s *ss) getRune() (r rune) {
   225		r, _, err := s.ReadRune()
   226		if err != nil {
   227			if err == io.EOF {
   228				return eof
   229			}
   230			s.error(err)
   231		}
   232		return
   233	}
   234	
   235	// mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
   236	// It is called in cases such as string scanning where an EOF is a
   237	// syntax error.
   238	func (s *ss) mustReadRune() (r rune) {
   239		r = s.getRune()
   240		if r == eof {
   241			s.error(io.ErrUnexpectedEOF)
   242		}
   243		return
   244	}
   245	
   246	func (s *ss) UnreadRune() error {
   247		if u, ok := s.rr.(runeUnreader); ok {
   248			u.UnreadRune()
   249		} else {
   250			s.peekRune = s.prevRune
   251		}
   252		s.prevRune = -1
   253		s.count--
   254		return nil
   255	}
   256	
   257	func (s *ss) error(err error) {
   258		panic(scanError{err})
   259	}
   260	
   261	func (s *ss) errorString(err string) {
   262		panic(scanError{errors.New(err)})
   263	}
   264	
   265	func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
   266		defer func() {
   267			if e := recover(); e != nil {
   268				if se, ok := e.(scanError); ok {
   269					err = se.err
   270				} else {
   271					panic(e)
   272				}
   273			}
   274		}()
   275		if f == nil {
   276			f = notSpace
   277		}
   278		s.buf = s.buf[:0]
   279		tok = s.token(skipSpace, f)
   280		return
   281	}
   282	
   283	// space is a copy of the unicode.White_Space ranges,
   284	// to avoid depending on package unicode.
   285	var space = [][2]uint16{
   286		{0x0009, 0x000d},
   287		{0x0020, 0x0020},
   288		{0x0085, 0x0085},
   289		{0x00a0, 0x00a0},
   290		{0x1680, 0x1680},
   291		{0x2000, 0x200a},
   292		{0x2028, 0x2029},
   293		{0x202f, 0x202f},
   294		{0x205f, 0x205f},
   295		{0x3000, 0x3000},
   296	}
   297	
   298	func isSpace(r rune) bool {
   299		if r >= 1<<16 {
   300			return false
   301		}
   302		rx := uint16(r)
   303		for _, rng := range space {
   304			if rx < rng[0] {
   305				return false
   306			}
   307			if rx <= rng[1] {
   308				return true
   309			}
   310		}
   311		return false
   312	}
   313	
   314	// notSpace is the default scanning function used in Token.
   315	func notSpace(r rune) bool {
   316		return !isSpace(r)
   317	}
   318	
   319	// SkipSpace provides Scan methods the ability to skip space and newline
   320	// characters in keeping with the current scanning mode set by format strings
   321	// and Scan/Scanln.
   322	func (s *ss) SkipSpace() {
   323		s.skipSpace(false)
   324	}
   325	
   326	// readRune is a structure to enable reading UTF-8 encoded code points
   327	// from an io.Reader.  It is used if the Reader given to the scanner does
   328	// not already implement io.RuneReader.
   329	type readRune struct {
   330		reader  io.Reader
   331		buf     [utf8.UTFMax]byte // used only inside ReadRune
   332		pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
   333		pendBuf [utf8.UTFMax]byte // bytes left over
   334	}
   335	
   336	// readByte returns the next byte from the input, which may be
   337	// left over from a previous read if the UTF-8 was ill-formed.
   338	func (r *readRune) readByte() (b byte, err error) {
   339		if r.pending > 0 {
   340			b = r.pendBuf[0]
   341			copy(r.pendBuf[0:], r.pendBuf[1:])
   342			r.pending--
   343			return
   344		}
   345		n, err := io.ReadFull(r.reader, r.pendBuf[0:1])
   346		if n != 1 {
   347			return 0, err
   348		}
   349		return r.pendBuf[0], err
   350	}
   351	
   352	// unread saves the bytes for the next read.
   353	func (r *readRune) unread(buf []byte) {
   354		copy(r.pendBuf[r.pending:], buf)
   355		r.pending += len(buf)
   356	}
   357	
   358	// ReadRune returns the next UTF-8 encoded code point from the
   359	// io.Reader inside r.
   360	func (r *readRune) ReadRune() (rr rune, size int, err error) {
   361		r.buf[0], err = r.readByte()
   362		if err != nil {
   363			return 0, 0, err
   364		}
   365		if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
   366			rr = rune(r.buf[0])
   367			size = 1 // Known to be 1.
   368			return
   369		}
   370		var n int
   371		for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
   372			r.buf[n], err = r.readByte()
   373			if err != nil {
   374				if err == io.EOF {
   375					err = nil
   376					break
   377				}
   378				return
   379			}
   380		}
   381		rr, size = utf8.DecodeRune(r.buf[0:n])
   382		if size < n { // an error
   383			r.unread(r.buf[size:n])
   384		}
   385		return
   386	}
   387	
   388	var ssFree = sync.Pool{
   389		New: func() interface{} { return new(ss) },
   390	}
   391	
   392	// newScanState allocates a new ss struct or grab a cached one.
   393	func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
   394		s = ssFree.Get().(*ss)
   395		if rr, ok := r.(io.RuneReader); ok {
   396			s.rr = rr
   397		} else {
   398			s.rr = &readRune{reader: r}
   399		}
   400		s.nlIsSpace = nlIsSpace
   401		s.nlIsEnd = nlIsEnd
   402		s.prevRune = -1
   403		s.peekRune = -1
   404		s.atEOF = false
   405		s.limit = hugeWid
   406		s.argLimit = hugeWid
   407		s.maxWid = hugeWid
   408		s.validSave = true
   409		s.count = 0
   410		return
   411	}
   412	
   413	// free saves used ss structs in ssFree; avoid an allocation per invocation.
   414	func (s *ss) free(old ssave) {
   415		// If it was used recursively, just restore the old state.
   416		if old.validSave {
   417			s.ssave = old
   418			return
   419		}
   420		// Don't hold on to ss structs with large buffers.
   421		if cap(s.buf) > 1024 {
   422			return
   423		}
   424		s.buf = s.buf[:0]
   425		s.rr = nil
   426		ssFree.Put(s)
   427	}
   428	
   429	// skipSpace skips spaces and maybe newlines.
   430	func (s *ss) skipSpace(stopAtNewline bool) {
   431		for {
   432			r := s.getRune()
   433			if r == eof {
   434				return
   435			}
   436			if r == '\r' && s.peek("\n") {
   437				continue
   438			}
   439			if r == '\n' {
   440				if stopAtNewline {
   441					break
   442				}
   443				if s.nlIsSpace {
   444					continue
   445				}
   446				s.errorString("unexpected newline")
   447				return
   448			}
   449			if !isSpace(r) {
   450				s.UnreadRune()
   451				break
   452			}
   453		}
   454	}
   455	
   456	// token returns the next space-delimited string from the input.  It
   457	// skips white space.  For Scanln, it stops at newlines.  For Scan,
   458	// newlines are treated as spaces.
   459	func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
   460		if skipSpace {
   461			s.skipSpace(false)
   462		}
   463		// read until white space or newline
   464		for {
   465			r := s.getRune()
   466			if r == eof {
   467				break
   468			}
   469			if !f(r) {
   470				s.UnreadRune()
   471				break
   472			}
   473			s.buf.WriteRune(r)
   474		}
   475		return s.buf
   476	}
   477	
   478	var complexError = errors.New("syntax error scanning complex number")
   479	var boolError = errors.New("syntax error scanning boolean")
   480	
   481	func indexRune(s string, r rune) int {
   482		for i, c := range s {
   483			if c == r {
   484				return i
   485			}
   486		}
   487		return -1
   488	}
   489	
   490	// consume reads the next rune in the input and reports whether it is in the ok string.
   491	// If accept is true, it puts the character into the input token.
   492	func (s *ss) consume(ok string, accept bool) bool {
   493		r := s.getRune()
   494		if r == eof {
   495			return false
   496		}
   497		if indexRune(ok, r) >= 0 {
   498			if accept {
   499				s.buf.WriteRune(r)
   500			}
   501			return true
   502		}
   503		if r != eof && accept {
   504			s.UnreadRune()
   505		}
   506		return false
   507	}
   508	
   509	// peek reports whether the next character is in the ok string, without consuming it.
   510	func (s *ss) peek(ok string) bool {
   511		r := s.getRune()
   512		if r != eof {
   513			s.UnreadRune()
   514		}
   515		return indexRune(ok, r) >= 0
   516	}
   517	
   518	func (s *ss) notEOF() {
   519		// Guarantee there is data to be read.
   520		if r := s.getRune(); r == eof {
   521			panic(io.EOF)
   522		}
   523		s.UnreadRune()
   524	}
   525	
   526	// accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
   527	// buffer and returns true. Otherwise it return false.
   528	func (s *ss) accept(ok string) bool {
   529		return s.consume(ok, true)
   530	}
   531	
   532	// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
   533	func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
   534		for _, v := range okVerbs {
   535			if v == verb {
   536				return true
   537			}
   538		}
   539		s.errorString("bad verb %" + string(verb) + " for " + typ)
   540		return false
   541	}
   542	
   543	// scanBool returns the value of the boolean represented by the next token.
   544	func (s *ss) scanBool(verb rune) bool {
   545		s.skipSpace(false)
   546		s.notEOF()
   547		if !s.okVerb(verb, "tv", "boolean") {
   548			return false
   549		}
   550		// Syntax-checking a boolean is annoying.  We're not fastidious about case.
   551		switch s.getRune() {
   552		case '0':
   553			return false
   554		case '1':
   555			return true
   556		case 't', 'T':
   557			if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
   558				s.error(boolError)
   559			}
   560			return true
   561		case 'f', 'F':
   562			if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
   563				s.error(boolError)
   564			}
   565			return false
   566		}
   567		return false
   568	}
   569	
   570	// Numerical elements
   571	const (
   572		binaryDigits      = "01"
   573		octalDigits       = "01234567"
   574		decimalDigits     = "0123456789"
   575		hexadecimalDigits = "0123456789aAbBcCdDeEfF"
   576		sign              = "+-"
   577		period            = "."
   578		exponent          = "eEp"
   579	)
   580	
   581	// getBase returns the numeric base represented by the verb and its digit string.
   582	func (s *ss) getBase(verb rune) (base int, digits string) {
   583		s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
   584		base = 10
   585		digits = decimalDigits
   586		switch verb {
   587		case 'b':
   588			base = 2
   589			digits = binaryDigits
   590		case 'o':
   591			base = 8
   592			digits = octalDigits
   593		case 'x', 'X', 'U':
   594			base = 16
   595			digits = hexadecimalDigits
   596		}
   597		return
   598	}
   599	
   600	// scanNumber returns the numerical string with specified digits starting here.
   601	func (s *ss) scanNumber(digits string, haveDigits bool) string {
   602		if !haveDigits {
   603			s.notEOF()
   604			if !s.accept(digits) {
   605				s.errorString("expected integer")
   606			}
   607		}
   608		for s.accept(digits) {
   609		}
   610		return string(s.buf)
   611	}
   612	
   613	// scanRune returns the next rune value in the input.
   614	func (s *ss) scanRune(bitSize int) int64 {
   615		s.notEOF()
   616		r := int64(s.getRune())
   617		n := uint(bitSize)
   618		x := (r << (64 - n)) >> (64 - n)
   619		if x != r {
   620			s.errorString("overflow on character value " + string(r))
   621		}
   622		return r
   623	}
   624	
   625	// scanBasePrefix reports whether the integer begins with a 0 or 0x,
   626	// and returns the base, digit string, and whether a zero was found.
   627	// It is called only if the verb is %v.
   628	func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
   629		if !s.peek("0") {
   630			return 10, decimalDigits, false
   631		}
   632		s.accept("0")
   633		found = true // We've put a digit into the token buffer.
   634		// Special cases for '0' && '0x'
   635		base, digits = 8, octalDigits
   636		if s.peek("xX") {
   637			s.consume("xX", false)
   638			base, digits = 16, hexadecimalDigits
   639		}
   640		return
   641	}
   642	
   643	// scanInt returns the value of the integer represented by the next
   644	// token, checking for overflow.  Any error is stored in s.err.
   645	func (s *ss) scanInt(verb rune, bitSize int) int64 {
   646		if verb == 'c' {
   647			return s.scanRune(bitSize)
   648		}
   649		s.skipSpace(false)
   650		s.notEOF()
   651		base, digits := s.getBase(verb)
   652		haveDigits := false
   653		if verb == 'U' {
   654			if !s.consume("U", false) || !s.consume("+", false) {
   655				s.errorString("bad unicode format ")
   656			}
   657		} else {
   658			s.accept(sign) // If there's a sign, it will be left in the token buffer.
   659			if verb == 'v' {
   660				base, digits, haveDigits = s.scanBasePrefix()
   661			}
   662		}
   663		tok := s.scanNumber(digits, haveDigits)
   664		i, err := strconv.ParseInt(tok, base, 64)
   665		if err != nil {
   666			s.error(err)
   667		}
   668		n := uint(bitSize)
   669		x := (i << (64 - n)) >> (64 - n)
   670		if x != i {
   671			s.errorString("integer overflow on token " + tok)
   672		}
   673		return i
   674	}
   675	
   676	// scanUint returns the value of the unsigned integer represented
   677	// by the next token, checking for overflow.  Any error is stored in s.err.
   678	func (s *ss) scanUint(verb rune, bitSize int) uint64 {
   679		if verb == 'c' {
   680			return uint64(s.scanRune(bitSize))
   681		}
   682		s.skipSpace(false)
   683		s.notEOF()
   684		base, digits := s.getBase(verb)
   685		haveDigits := false
   686		if verb == 'U' {
   687			if !s.consume("U", false) || !s.consume("+", false) {
   688				s.errorString("bad unicode format ")
   689			}
   690		} else if verb == 'v' {
   691			base, digits, haveDigits = s.scanBasePrefix()
   692		}
   693		tok := s.scanNumber(digits, haveDigits)
   694		i, err := strconv.ParseUint(tok, base, 64)
   695		if err != nil {
   696			s.error(err)
   697		}
   698		n := uint(bitSize)
   699		x := (i << (64 - n)) >> (64 - n)
   700		if x != i {
   701			s.errorString("unsigned integer overflow on token " + tok)
   702		}
   703		return i
   704	}
   705	
   706	// floatToken returns the floating-point number starting here, no longer than swid
   707	// if the width is specified. It's not rigorous about syntax because it doesn't check that
   708	// we have at least some digits, but Atof will do that.
   709	func (s *ss) floatToken() string {
   710		s.buf = s.buf[:0]
   711		// NaN?
   712		if s.accept("nN") && s.accept("aA") && s.accept("nN") {
   713			return string(s.buf)
   714		}
   715		// leading sign?
   716		s.accept(sign)
   717		// Inf?
   718		if s.accept("iI") && s.accept("nN") && s.accept("fF") {
   719			return string(s.buf)
   720		}
   721		// digits?
   722		for s.accept(decimalDigits) {
   723		}
   724		// decimal point?
   725		if s.accept(period) {
   726			// fraction?
   727			for s.accept(decimalDigits) {
   728			}
   729		}
   730		// exponent?
   731		if s.accept(exponent) {
   732			// leading sign?
   733			s.accept(sign)
   734			// digits?
   735			for s.accept(decimalDigits) {
   736			}
   737		}
   738		return string(s.buf)
   739	}
   740	
   741	// complexTokens returns the real and imaginary parts of the complex number starting here.
   742	// The number might be parenthesized and has the format (N+Ni) where N is a floating-point
   743	// number and there are no spaces within.
   744	func (s *ss) complexTokens() (real, imag string) {
   745		// TODO: accept N and Ni independently?
   746		parens := s.accept("(")
   747		real = s.floatToken()
   748		s.buf = s.buf[:0]
   749		// Must now have a sign.
   750		if !s.accept("+-") {
   751			s.error(complexError)
   752		}
   753		// Sign is now in buffer
   754		imagSign := string(s.buf)
   755		imag = s.floatToken()
   756		if !s.accept("i") {
   757			s.error(complexError)
   758		}
   759		if parens && !s.accept(")") {
   760			s.error(complexError)
   761		}
   762		return real, imagSign + imag
   763	}
   764	
   765	// convertFloat converts the string to a float64value.
   766	func (s *ss) convertFloat(str string, n int) float64 {
   767		if p := indexRune(str, 'p'); p >= 0 {
   768			// Atof doesn't handle power-of-2 exponents,
   769			// but they're easy to evaluate.
   770			f, err := strconv.ParseFloat(str[:p], n)
   771			if err != nil {
   772				// Put full string into error.
   773				if e, ok := err.(*strconv.NumError); ok {
   774					e.Num = str
   775				}
   776				s.error(err)
   777			}
   778			m, err := strconv.Atoi(str[p+1:])
   779			if err != nil {
   780				// Put full string into error.
   781				if e, ok := err.(*strconv.NumError); ok {
   782					e.Num = str
   783				}
   784				s.error(err)
   785			}
   786			return math.Ldexp(f, m)
   787		}
   788		f, err := strconv.ParseFloat(str, n)
   789		if err != nil {
   790			s.error(err)
   791		}
   792		return f
   793	}
   794	
   795	// convertComplex converts the next token to a complex128 value.
   796	// The atof argument is a type-specific reader for the underlying type.
   797	// If we're reading complex64, atof will parse float32s and convert them
   798	// to float64's to avoid reproducing this code for each complex type.
   799	func (s *ss) scanComplex(verb rune, n int) complex128 {
   800		if !s.okVerb(verb, floatVerbs, "complex") {
   801			return 0
   802		}
   803		s.skipSpace(false)
   804		s.notEOF()
   805		sreal, simag := s.complexTokens()
   806		real := s.convertFloat(sreal, n/2)
   807		imag := s.convertFloat(simag, n/2)
   808		return complex(real, imag)
   809	}
   810	
   811	// convertString returns the string represented by the next input characters.
   812	// The format of the input is determined by the verb.
   813	func (s *ss) convertString(verb rune) (str string) {
   814		if !s.okVerb(verb, "svqx", "string") {
   815			return ""
   816		}
   817		s.skipSpace(false)
   818		s.notEOF()
   819		switch verb {
   820		case 'q':
   821			str = s.quotedString()
   822		case 'x':
   823			str = s.hexString()
   824		default:
   825			str = string(s.token(true, notSpace)) // %s and %v just return the next word
   826		}
   827		return
   828	}
   829	
   830	// quotedString returns the double- or back-quoted string represented by the next input characters.
   831	func (s *ss) quotedString() string {
   832		s.notEOF()
   833		quote := s.getRune()
   834		switch quote {
   835		case '`':
   836			// Back-quoted: Anything goes until EOF or back quote.
   837			for {
   838				r := s.mustReadRune()
   839				if r == quote {
   840					break
   841				}
   842				s.buf.WriteRune(r)
   843			}
   844			return string(s.buf)
   845		case '"':
   846			// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
   847			s.buf.WriteRune(quote)
   848			for {
   849				r := s.mustReadRune()
   850				s.buf.WriteRune(r)
   851				if r == '\\' {
   852					// In a legal backslash escape, no matter how long, only the character
   853					// immediately after the escape can itself be a backslash or quote.
   854					// Thus we only need to protect the first character after the backslash.
   855					s.buf.WriteRune(s.mustReadRune())
   856				} else if r == '"' {
   857					break
   858				}
   859			}
   860			result, err := strconv.Unquote(string(s.buf))
   861			if err != nil {
   862				s.error(err)
   863			}
   864			return result
   865		default:
   866			s.errorString("expected quoted string")
   867		}
   868		return ""
   869	}
   870	
   871	// hexDigit returns the value of the hexadecimal digit.
   872	func hexDigit(d rune) (int, bool) {
   873		digit := int(d)
   874		switch digit {
   875		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   876			return digit - '0', true
   877		case 'a', 'b', 'c', 'd', 'e', 'f':
   878			return 10 + digit - 'a', true
   879		case 'A', 'B', 'C', 'D', 'E', 'F':
   880			return 10 + digit - 'A', true
   881		}
   882		return -1, false
   883	}
   884	
   885	// hexByte returns the next hex-encoded (two-character) byte from the input.
   886	// It returns ok==false if the next bytes in the input do not encode a hex byte.
   887	// If the first byte is hex and the second is not, processing stops.
   888	func (s *ss) hexByte() (b byte, ok bool) {
   889		rune1 := s.getRune()
   890		if rune1 == eof {
   891			return
   892		}
   893		value1, ok := hexDigit(rune1)
   894		if !ok {
   895			s.UnreadRune()
   896			return
   897		}
   898		value2, ok := hexDigit(s.mustReadRune())
   899		if !ok {
   900			s.errorString("illegal hex digit")
   901			return
   902		}
   903		return byte(value1<<4 | value2), true
   904	}
   905	
   906	// hexString returns the space-delimited hexpair-encoded string.
   907	func (s *ss) hexString() string {
   908		s.notEOF()
   909		for {
   910			b, ok := s.hexByte()
   911			if !ok {
   912				break
   913			}
   914			s.buf.WriteByte(b)
   915		}
   916		if len(s.buf) == 0 {
   917			s.errorString("no hex data for %x string")
   918			return ""
   919		}
   920		return string(s.buf)
   921	}
   922	
   923	const floatVerbs = "beEfFgGv"
   924	
   925	const hugeWid = 1 << 30
   926	
   927	// scanOne scans a single value, deriving the scanner from the type of the argument.
   928	func (s *ss) scanOne(verb rune, arg interface{}) {
   929		s.buf = s.buf[:0]
   930		var err error
   931		// If the parameter has its own Scan method, use that.
   932		if v, ok := arg.(Scanner); ok {
   933			err = v.Scan(s, verb)
   934			if err != nil {
   935				if err == io.EOF {
   936					err = io.ErrUnexpectedEOF
   937				}
   938				s.error(err)
   939			}
   940			return
   941		}
   942	
   943		switch v := arg.(type) {
   944		case *bool:
   945			*v = s.scanBool(verb)
   946		case *complex64:
   947			*v = complex64(s.scanComplex(verb, 64))
   948		case *complex128:
   949			*v = s.scanComplex(verb, 128)
   950		case *int:
   951			*v = int(s.scanInt(verb, intBits))
   952		case *int8:
   953			*v = int8(s.scanInt(verb, 8))
   954		case *int16:
   955			*v = int16(s.scanInt(verb, 16))
   956		case *int32:
   957			*v = int32(s.scanInt(verb, 32))
   958		case *int64:
   959			*v = s.scanInt(verb, 64)
   960		case *uint:
   961			*v = uint(s.scanUint(verb, intBits))
   962		case *uint8:
   963			*v = uint8(s.scanUint(verb, 8))
   964		case *uint16:
   965			*v = uint16(s.scanUint(verb, 16))
   966		case *uint32:
   967			*v = uint32(s.scanUint(verb, 32))
   968		case *uint64:
   969			*v = s.scanUint(verb, 64)
   970		case *uintptr:
   971			*v = uintptr(s.scanUint(verb, uintptrBits))
   972		// Floats are tricky because you want to scan in the precision of the result, not
   973		// scan in high precision and convert, in order to preserve the correct error condition.
   974		case *float32:
   975			if s.okVerb(verb, floatVerbs, "float32") {
   976				s.skipSpace(false)
   977				s.notEOF()
   978				*v = float32(s.convertFloat(s.floatToken(), 32))
   979			}
   980		case *float64:
   981			if s.okVerb(verb, floatVerbs, "float64") {
   982				s.skipSpace(false)
   983				s.notEOF()
   984				*v = s.convertFloat(s.floatToken(), 64)
   985			}
   986		case *string:
   987			*v = s.convertString(verb)
   988		case *[]byte:
   989			// We scan to string and convert so we get a copy of the data.
   990			// If we scanned to bytes, the slice would point at the buffer.
   991			*v = []byte(s.convertString(verb))
   992		default:
   993			val := reflect.ValueOf(v)
   994			ptr := val
   995			if ptr.Kind() != reflect.Ptr {
   996				s.errorString("type not a pointer: " + val.Type().String())
   997				return
   998			}
   999			switch v := ptr.Elem(); v.Kind() {
  1000			case reflect.Bool:
  1001				v.SetBool(s.scanBool(verb))
  1002			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  1003				v.SetInt(s.scanInt(verb, v.Type().Bits()))
  1004			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  1005				v.SetUint(s.scanUint(verb, v.Type().Bits()))
  1006			case reflect.String:
  1007				v.SetString(s.convertString(verb))
  1008			case reflect.Slice:
  1009				// For now, can only handle (renamed) []byte.
  1010				typ := v.Type()
  1011				if typ.Elem().Kind() != reflect.Uint8 {
  1012					s.errorString("can't scan type: " + val.Type().String())
  1013				}
  1014				str := s.convertString(verb)
  1015				v.Set(reflect.MakeSlice(typ, len(str), len(str)))
  1016				for i := 0; i < len(str); i++ {
  1017					v.Index(i).SetUint(uint64(str[i]))
  1018				}
  1019			case reflect.Float32, reflect.Float64:
  1020				s.skipSpace(false)
  1021				s.notEOF()
  1022				v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
  1023			case reflect.Complex64, reflect.Complex128:
  1024				v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
  1025			default:
  1026				s.errorString("can't scan type: " + val.Type().String())
  1027			}
  1028		}
  1029	}
  1030	
  1031	// errorHandler turns local panics into error returns.
  1032	func errorHandler(errp *error) {
  1033		if e := recover(); e != nil {
  1034			if se, ok := e.(scanError); ok { // catch local error
  1035				*errp = se.err
  1036			} else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
  1037				*errp = eof
  1038			} else {
  1039				panic(e)
  1040			}
  1041		}
  1042	}
  1043	
  1044	// doScan does the real work for scanning without a format string.
  1045	func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
  1046		defer errorHandler(&err)
  1047		for _, arg := range a {
  1048			s.scanOne('v', arg)
  1049			numProcessed++
  1050		}
  1051		// Check for newline (or EOF) if required (Scanln etc.).
  1052		if s.nlIsEnd {
  1053			for {
  1054				r := s.getRune()
  1055				if r == '\n' || r == eof {
  1056					break
  1057				}
  1058				if !isSpace(r) {
  1059					s.errorString("expected newline")
  1060					break
  1061				}
  1062			}
  1063		}
  1064		return
  1065	}
  1066	
  1067	// advance determines whether the next characters in the input match
  1068	// those of the format. It returns the number of bytes (sic) consumed
  1069	// in the format. All runs of space characters in either input or
  1070	// format behave as a single space. Newlines are special, though:
  1071	// newlines in the format must match those in the input and vice versa.
  1072	// This routine also handles the %% case. If the return value is zero,
  1073	// either format starts with a % (with no following %) or the input
  1074	// is empty. If it is negative, the input did not match the string.
  1075	func (s *ss) advance(format string) (i int) {
  1076		for i < len(format) {
  1077			fmtc, w := utf8.DecodeRuneInString(format[i:])
  1078			if fmtc == '%' {
  1079				// %% acts like a real percent
  1080				nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
  1081				if nextc != '%' {
  1082					return
  1083				}
  1084				i += w // skip the first %
  1085			}
  1086			sawSpace := false
  1087			wasNewline := false
  1088			// Skip spaces in format but absorb at most one newline.
  1089			for isSpace(fmtc) && i < len(format) {
  1090				if fmtc == '\n' {
  1091					if wasNewline { // Already saw one; stop here.
  1092						break
  1093					}
  1094					wasNewline = true
  1095				}
  1096				sawSpace = true
  1097				i += w
  1098				fmtc, w = utf8.DecodeRuneInString(format[i:])
  1099			}
  1100			if sawSpace {
  1101				// There was space in the format, so there should be space
  1102				// in the input.
  1103				inputc := s.getRune()
  1104				if inputc == eof {
  1105					return
  1106				}
  1107				if !isSpace(inputc) {
  1108					// Space in format but not in input.
  1109					s.errorString("expected space in input to match format")
  1110				}
  1111				// Skip spaces but stop at newline.
  1112				for inputc != '\n' && isSpace(inputc) {
  1113					inputc = s.getRune()
  1114				}
  1115				if inputc == '\n' {
  1116					if !wasNewline {
  1117						s.errorString("newline in input does not match format")
  1118					}
  1119					// We've reached a newline, stop now; don't read further.
  1120					return
  1121				}
  1122				s.UnreadRune()
  1123				if wasNewline {
  1124					s.errorString("newline in format does not match input")
  1125				}
  1126				continue
  1127			}
  1128			inputc := s.mustReadRune()
  1129			if fmtc != inputc {
  1130				s.UnreadRune()
  1131				return -1
  1132			}
  1133			i += w
  1134		}
  1135		return
  1136	}
  1137	
  1138	// doScanf does the real work when scanning with a format string.
  1139	//  At the moment, it handles only pointers to basic types.
  1140	func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
  1141		defer errorHandler(&err)
  1142		end := len(format) - 1
  1143		// We process one item per non-trivial format
  1144		for i := 0; i <= end; {
  1145			w := s.advance(format[i:])
  1146			if w > 0 {
  1147				i += w
  1148				continue
  1149			}
  1150			// Either we failed to advance, we have a percent character, or we ran out of input.
  1151			if format[i] != '%' {
  1152				// Can't advance format.  Why not?
  1153				if w < 0 {
  1154					s.errorString("input does not match format")
  1155				}
  1156				// Otherwise at EOF; "too many operands" error handled below
  1157				break
  1158			}
  1159			i++ // % is one byte
  1160	
  1161			// do we have 20 (width)?
  1162			var widPresent bool
  1163			s.maxWid, widPresent, i = parsenum(format, i, end)
  1164			if !widPresent {
  1165				s.maxWid = hugeWid
  1166			}
  1167			s.SkipSpace()
  1168			s.argLimit = s.limit
  1169			if f := s.count + s.maxWid; f < s.argLimit {
  1170				s.argLimit = f
  1171			}
  1172	
  1173			c, w := utf8.DecodeRuneInString(format[i:])
  1174			i += w
  1175	
  1176			if numProcessed >= len(a) { // out of operands
  1177				s.errorString("too few operands for format %" + format[i-w:])
  1178				break
  1179			}
  1180			arg := a[numProcessed]
  1181	
  1182			s.scanOne(c, arg)
  1183			numProcessed++
  1184			s.argLimit = s.limit
  1185		}
  1186		if numProcessed < len(a) {
  1187			s.errorString("too many operands")
  1188		}
  1189		return
  1190	}
  1191	

View as plain text