The Go Programming Language

Source file src/pkg/fmt/scan.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package fmt
     6	
     7	import (
     8		"bytes"
     9		"io"
    10		"math"
    11		"os"
    12		"reflect"
    13		"strconv"
    14		"strings"
    15		"unicode"
    16		"utf8"
    17	)
    18	
    19	// runeUnreader is the interface to something that can unread runes.
    20	// If the object provided to Scan does not satisfy this interface,
    21	// a local buffer will be used to back up the input, but its contents
    22	// will be lost when Scan returns.
    23	type runeUnreader interface {
    24		UnreadRune() os.Error
    25	}
    26	
    27	// ScanState represents the scanner state passed to custom scanners.
    28	// Scanners may do rune-at-a-time scanning or ask the ScanState
    29	// to discover the next space-delimited token.
    30	type ScanState interface {
    31		// ReadRune reads the next rune (Unicode code point) from the input.
    32		// If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
    33		// return EOF after returning the first '\n' or when reading beyond
    34		// the specified width.
    35		ReadRune() (rune int, size int, err os.Error)
    36		// UnreadRune causes the next call to ReadRune to return the same rune.
    37		UnreadRune() os.Error
    38		// SkipSpace skips space in the input. Newlines are treated as space 
    39		// unless the scan operation is Scanln, Fscanln or Sscanln, in which case 
    40		// a newline is treated as EOF.
    41		SkipSpace()
    42		// Token skips space in the input if skipSpace is true, then returns the
    43		// run of Unicode code points c satisfying f(c).  If f is nil,
    44		// !unicode.IsSpace(c) is used; that is, the token will hold non-space
    45		// characters.  Newlines are treated as space unless the scan operation
    46		// is Scanln, Fscanln or Sscanln, in which case a newline is treated as
    47		// EOF.  The returned slice points to shared data that may be overwritten
    48		// by the next call to Token, a call to a Scan function using the ScanState
    49		// as input, or when the calling Scan method returns.
    50		Token(skipSpace bool, f func(int) bool) (token []byte, err os.Error)
    51		// Width returns the value of the width option and whether it has been set.
    52		// The unit is Unicode code points.
    53		Width() (wid int, ok bool)
    54		// Because ReadRune is implemented by the interface, Read should never be
    55		// called by the scanning routines and a valid implementation of
    56		// ScanState may choose always to return an error from Read.
    57		Read(buf []byte) (n int, err os.Error)
    58	}
    59	
    60	// Scanner is implemented by any value that has a Scan method, which scans
    61	// the input for the representation of a value and stores the result in the
    62	// receiver, which must be a pointer to be useful.  The Scan method is called
    63	// for any argument to Scan, Scanf, or Scanln that implements it.
    64	type Scanner interface {
    65		Scan(state ScanState, verb int) os.Error
    66	}
    67	
    68	// Scan scans text read from standard input, storing successive
    69	// space-separated values into successive arguments.  Newlines count
    70	// as space.  It returns the number of items successfully scanned.
    71	// If that is less than the number of arguments, err will report why.
    72	func Scan(a ...interface{}) (n int, err os.Error) {
    73		return Fscan(os.Stdin, a...)
    74	}
    75	
    76	// Scanln is similar to Scan, but stops scanning at a newline and
    77	// after the final item there must be a newline or EOF.
    78	func Scanln(a ...interface{}) (n int, err os.Error) {
    79		return Fscanln(os.Stdin, a...)
    80	}
    81	
    82	// Scanf scans text read from standard input, storing successive
    83	// space-separated values into successive arguments as determined by
    84	// the format.  It returns the number of items successfully scanned.
    85	func Scanf(format string, a ...interface{}) (n int, err os.Error) {
    86		return Fscanf(os.Stdin, format, a...)
    87	}
    88	
    89	// Sscan scans the argument string, storing successive space-separated
    90	// values into successive arguments.  Newlines count as space.  It
    91	// returns the number of items successfully scanned.  If that is less
    92	// than the number of arguments, err will report why.
    93	func Sscan(str string, a ...interface{}) (n int, err os.Error) {
    94		return Fscan(strings.NewReader(str), a...)
    95	}
    96	
    97	// Sscanln is similar to Sscan, but stops scanning at a newline and
    98	// after the final item there must be a newline or EOF.
    99	func Sscanln(str string, a ...interface{}) (n int, err os.Error) {
   100		return Fscanln(strings.NewReader(str), a...)
   101	}
   102	
   103	// Sscanf scans the argument string, storing successive space-separated
   104	// values into successive arguments as determined by the format.  It
   105	// returns the number of items successfully parsed.
   106	func Sscanf(str string, format string, a ...interface{}) (n int, err os.Error) {
   107		return Fscanf(strings.NewReader(str), format, a...)
   108	}
   109	
   110	// Fscan scans text read from r, storing successive space-separated
   111	// values into successive arguments.  Newlines count as space.  It
   112	// returns the number of items successfully scanned.  If that is less
   113	// than the number of arguments, err will report why.
   114	func Fscan(r io.Reader, a ...interface{}) (n int, err os.Error) {
   115		s, old := newScanState(r, true, false)
   116		n, err = s.doScan(a)
   117		s.free(old)
   118		return
   119	}
   120	
   121	// Fscanln is similar to Fscan, but stops scanning at a newline and
   122	// after the final item there must be a newline or EOF.
   123	func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) {
   124		s, old := newScanState(r, false, true)
   125		n, err = s.doScan(a)
   126		s.free(old)
   127		return
   128	}
   129	
   130	// Fscanf scans text read from r, storing successive space-separated
   131	// values into successive arguments as determined by the format.  It
   132	// returns the number of items successfully parsed.
   133	func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err os.Error) {
   134		s, old := newScanState(r, false, false)
   135		n, err = s.doScanf(format, a)
   136		s.free(old)
   137		return
   138	}
   139	
   140	// scanError represents an error generated by the scanning software.
   141	// It's used as a unique signature to identify such errors when recovering.
   142	type scanError struct {
   143		err os.Error
   144	}
   145	
   146	const eof = -1
   147	
   148	// ss is the internal implementation of ScanState.
   149	type ss struct {
   150		rr       io.RuneReader // where to read input
   151		buf      bytes.Buffer  // token accumulator
   152		peekRune int           // one-rune lookahead
   153		prevRune int           // last rune returned by ReadRune
   154		count    int           // runes consumed so far.
   155		atEOF    bool          // already read EOF
   156		ssave
   157	}
   158	
   159	// ssave holds the parts of ss that need to be
   160	// saved and restored on recursive scans.
   161	type ssave struct {
   162		validSave  bool // is or was a part of an actual ss.
   163		nlIsEnd    bool // whether newline terminates scan
   164		nlIsSpace  bool // whether newline counts as white space
   165		fieldLimit int  // max value of ss.count for this field; fieldLimit <= limit
   166		limit      int  // max value of ss.count.
   167		maxWid     int  // width of this field.
   168	}
   169	
   170	// The Read method is only in ScanState so that ScanState
   171	// satisfies io.Reader. It will never be called when used as
   172	// intended, so there is no need to make it actually work.
   173	func (s *ss) Read(buf []byte) (n int, err os.Error) {
   174		return 0, os.NewError("ScanState's Read should not be called. Use ReadRune")
   175	}
   176	
   177	func (s *ss) ReadRune() (rune int, size int, err os.Error) {
   178		if s.peekRune >= 0 {
   179			s.count++
   180			rune = s.peekRune
   181			size = utf8.RuneLen(rune)
   182			s.prevRune = rune
   183			s.peekRune = -1
   184			return
   185		}
   186		if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.fieldLimit {
   187			err = os.EOF
   188			return
   189		}
   190	
   191		rune, size, err = s.rr.ReadRune()
   192		if err == nil {
   193			s.count++
   194			s.prevRune = rune
   195		} else if err == os.EOF {
   196			s.atEOF = true
   197		}
   198		return
   199	}
   200	
   201	func (s *ss) Width() (wid int, ok bool) {
   202		if s.maxWid == hugeWid {
   203			return 0, false
   204		}
   205		return s.maxWid, true
   206	}
   207	
   208	// The public method returns an error; this private one panics.
   209	// If getRune reaches EOF, the return value is EOF (-1).
   210	func (s *ss) getRune() (rune int) {
   211		rune, _, err := s.ReadRune()
   212		if err != nil {
   213			if err == os.EOF {
   214				return eof
   215			}
   216			s.error(err)
   217		}
   218		return
   219	}
   220	
   221	// mustReadRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
   222	// It is called in cases such as string scanning where an EOF is a
   223	// syntax error.
   224	func (s *ss) mustReadRune() (rune int) {
   225		rune = s.getRune()
   226		if rune == eof {
   227			s.error(io.ErrUnexpectedEOF)
   228		}
   229		return
   230	}
   231	
   232	func (s *ss) UnreadRune() os.Error {
   233		if u, ok := s.rr.(runeUnreader); ok {
   234			u.UnreadRune()
   235		} else {
   236			s.peekRune = s.prevRune
   237		}
   238		s.prevRune = -1
   239		s.count--
   240		return nil
   241	}
   242	
   243	func (s *ss) error(err os.Error) {
   244		panic(scanError{err})
   245	}
   246	
   247	func (s *ss) errorString(err string) {
   248		panic(scanError{os.NewError(err)})
   249	}
   250	
   251	func (s *ss) Token(skipSpace bool, f func(int) bool) (tok []byte, err os.Error) {
   252		defer func() {
   253			if e := recover(); e != nil {
   254				if se, ok := e.(scanError); ok {
   255					err = se.err
   256				} else {
   257					panic(e)
   258				}
   259			}
   260		}()
   261		if f == nil {
   262			f = notSpace
   263		}
   264		s.buf.Reset()
   265		tok = s.token(skipSpace, f)
   266		return
   267	}
   268	
   269	// notSpace is the default scanning function used in Token.
   270	func notSpace(r int) bool {
   271		return !unicode.IsSpace(r)
   272	}
   273	
   274	// skipSpace provides Scan() methods the ability to skip space and newline characters 
   275	// in keeping with the current scanning mode set by format strings and Scan()/Scanln().
   276	func (s *ss) SkipSpace() {
   277		s.skipSpace(false)
   278	}
   279	
   280	// readRune is a structure to enable reading UTF-8 encoded code points
   281	// from an io.Reader.  It is used if the Reader given to the scanner does
   282	// not already implement io.RuneReader.
   283	type readRune struct {
   284		reader  io.Reader
   285		buf     [utf8.UTFMax]byte // used only inside ReadRune
   286		pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
   287		pendBuf [utf8.UTFMax]byte // bytes left over
   288	}
   289	
   290	// readByte returns the next byte from the input, which may be
   291	// left over from a previous read if the UTF-8 was ill-formed.
   292	func (r *readRune) readByte() (b byte, err os.Error) {
   293		if r.pending > 0 {
   294			b = r.pendBuf[0]
   295			copy(r.pendBuf[0:], r.pendBuf[1:])
   296			r.pending--
   297			return
   298		}
   299		_, err = r.reader.Read(r.pendBuf[0:1])
   300		return r.pendBuf[0], err
   301	}
   302	
   303	// unread saves the bytes for the next read.
   304	func (r *readRune) unread(buf []byte) {
   305		copy(r.pendBuf[r.pending:], buf)
   306		r.pending += len(buf)
   307	}
   308	
   309	// ReadRune returns the next UTF-8 encoded code point from the
   310	// io.Reader inside r.
   311	func (r *readRune) ReadRune() (rune int, size int, err os.Error) {
   312		r.buf[0], err = r.readByte()
   313		if err != nil {
   314			return 0, 0, err
   315		}
   316		if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
   317			rune = int(r.buf[0])
   318			return
   319		}
   320		var n int
   321		for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
   322			r.buf[n], err = r.readByte()
   323			if err != nil {
   324				if err == os.EOF {
   325					err = nil
   326					break
   327				}
   328				return
   329			}
   330		}
   331		rune, size = utf8.DecodeRune(r.buf[0:n])
   332		if size < n { // an error
   333			r.unread(r.buf[size:n])
   334		}
   335		return
   336	}
   337	
   338	var ssFree = newCache(func() interface{} { return new(ss) })
   339	
   340	// Allocate a new ss struct or grab a cached one.
   341	func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
   342		// If the reader is a *ss, then we've got a recursive
   343		// call to Scan, so re-use the scan state.
   344		s, ok := r.(*ss)
   345		if ok {
   346			old = s.ssave
   347			s.limit = s.fieldLimit
   348			s.nlIsEnd = nlIsEnd || s.nlIsEnd
   349			s.nlIsSpace = nlIsSpace
   350			return
   351		}
   352	
   353		s = ssFree.get().(*ss)
   354		if rr, ok := r.(io.RuneReader); ok {
   355			s.rr = rr
   356		} else {
   357			s.rr = &readRune{reader: r}
   358		}
   359		s.nlIsSpace = nlIsSpace
   360		s.nlIsEnd = nlIsEnd
   361		s.prevRune = -1
   362		s.peekRune = -1
   363		s.atEOF = false
   364		s.limit = hugeWid
   365		s.fieldLimit = hugeWid
   366		s.maxWid = hugeWid
   367		s.validSave = true
   368		return
   369	}
   370	
   371	// Save used ss structs in ssFree; avoid an allocation per invocation.
   372	func (s *ss) free(old ssave) {
   373		// If it was used recursively, just restore the old state.
   374		if old.validSave {
   375			s.ssave = old
   376			return
   377		}
   378		// Don't hold on to ss structs with large buffers.
   379		if cap(s.buf.Bytes()) > 1024 {
   380			return
   381		}
   382		s.buf.Reset()
   383		s.rr = nil
   384		ssFree.put(s)
   385	}
   386	
   387	// skipSpace skips spaces and maybe newlines.
   388	func (s *ss) skipSpace(stopAtNewline bool) {
   389		for {
   390			rune := s.getRune()
   391			if rune == eof {
   392				return
   393			}
   394			if rune == '\n' {
   395				if stopAtNewline {
   396					break
   397				}
   398				if s.nlIsSpace {
   399					continue
   400				}
   401				s.errorString("unexpected newline")
   402				return
   403			}
   404			if !unicode.IsSpace(rune) {
   405				s.UnreadRune()
   406				break
   407			}
   408		}
   409	}
   410	
   411	// token returns the next space-delimited string from the input.  It
   412	// skips white space.  For Scanln, it stops at newlines.  For Scan,
   413	// newlines are treated as spaces.
   414	func (s *ss) token(skipSpace bool, f func(int) bool) []byte {
   415		if skipSpace {
   416			s.skipSpace(false)
   417		}
   418		// read until white space or newline
   419		for {
   420			rune := s.getRune()
   421			if rune == eof {
   422				break
   423			}
   424			if !f(rune) {
   425				s.UnreadRune()
   426				break
   427			}
   428			s.buf.WriteRune(rune)
   429		}
   430		return s.buf.Bytes()
   431	}
   432	
   433	// typeError indicates that the type of the operand did not match the format
   434	func (s *ss) typeError(field interface{}, expected string) {
   435		s.errorString("expected field of type pointer to " + expected + "; found " + reflect.TypeOf(field).String())
   436	}
   437	
   438	var complexError = os.NewError("syntax error scanning complex number")
   439	var boolError = os.NewError("syntax error scanning boolean")
   440	
   441	// consume reads the next rune in the input and reports whether it is in the ok string.
   442	// If accept is true, it puts the character into the input token.
   443	func (s *ss) consume(ok string, accept bool) bool {
   444		rune := s.getRune()
   445		if rune == eof {
   446			return false
   447		}
   448		if strings.IndexRune(ok, rune) >= 0 {
   449			if accept {
   450				s.buf.WriteRune(rune)
   451			}
   452			return true
   453		}
   454		if rune != eof && accept {
   455			s.UnreadRune()
   456		}
   457		return false
   458	}
   459	
   460	// peek reports whether the next character is in the ok string, without consuming it.
   461	func (s *ss) peek(ok string) bool {
   462		rune := s.getRune()
   463		if rune != eof {
   464			s.UnreadRune()
   465		}
   466		return strings.IndexRune(ok, rune) >= 0
   467	}
   468	
   469	func (s *ss) notEOF() {
   470		// Guarantee there is data to be read.
   471		if rune := s.getRune(); rune == eof {
   472			panic(os.EOF)
   473		}
   474		s.UnreadRune()
   475	}
   476	
   477	// accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
   478	// buffer and returns true. Otherwise it return false.
   479	func (s *ss) accept(ok string) bool {
   480		return s.consume(ok, true)
   481	}
   482	
   483	// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
   484	func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
   485		for _, v := range okVerbs {
   486			if v == verb {
   487				return true
   488			}
   489		}
   490		s.errorString("bad verb %" + string(verb) + " for " + typ)
   491		return false
   492	}
   493	
   494	// scanBool returns the value of the boolean represented by the next token.
   495	func (s *ss) scanBool(verb int) bool {
   496		s.skipSpace(false)
   497		s.notEOF()
   498		if !s.okVerb(verb, "tv", "boolean") {
   499			return false
   500		}
   501		// Syntax-checking a boolean is annoying.  We're not fastidious about case.
   502		switch s.getRune() {
   503		case '0':
   504			return false
   505		case '1':
   506			return true
   507		case 't', 'T':
   508			if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
   509				s.error(boolError)
   510			}
   511			return true
   512		case 'f', 'F':
   513			if s.accept("aL") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
   514				s.error(boolError)
   515			}
   516			return false
   517		}
   518		return false
   519	}
   520	
   521	// Numerical elements
   522	const (
   523		binaryDigits      = "01"
   524		octalDigits       = "01234567"
   525		decimalDigits     = "0123456789"
   526		hexadecimalDigits = "0123456789aAbBcCdDeEfF"
   527		sign              = "+-"
   528		period            = "."
   529		exponent          = "eEp"
   530	)
   531	
   532	// getBase returns the numeric base represented by the verb and its digit string.
   533	func (s *ss) getBase(verb int) (base int, digits string) {
   534		s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
   535		base = 10
   536		digits = decimalDigits
   537		switch verb {
   538		case 'b':
   539			base = 2
   540			digits = binaryDigits
   541		case 'o':
   542			base = 8
   543			digits = octalDigits
   544		case 'x', 'X', 'U':
   545			base = 16
   546			digits = hexadecimalDigits
   547		}
   548		return
   549	}
   550	
   551	// scanNumber returns the numerical string with specified digits starting here.
   552	func (s *ss) scanNumber(digits string, haveDigits bool) string {
   553		if !haveDigits {
   554			s.notEOF()
   555			if !s.accept(digits) {
   556				s.errorString("expected integer")
   557			}
   558		}
   559		for s.accept(digits) {
   560		}
   561		return s.buf.String()
   562	}
   563	
   564	// scanRune returns the next rune value in the input.
   565	func (s *ss) scanRune(bitSize int) int64 {
   566		s.notEOF()
   567		rune := int64(s.getRune())
   568		n := uint(bitSize)
   569		x := (rune << (64 - n)) >> (64 - n)
   570		if x != rune {
   571			s.errorString("overflow on character value " + string(rune))
   572		}
   573		return rune
   574	}
   575	
   576	// scanBasePrefix reports whether the integer begins with a 0 or 0x,
   577	// and returns the base, digit string, and whether a zero was found.
   578	// It is called only if the verb is %v.
   579	func (s *ss) scanBasePrefix() (base int, digits string, found bool) {
   580		if !s.peek("0") {
   581			return 10, decimalDigits, false
   582		}
   583		s.accept("0")
   584		found = true // We've put a digit into the token buffer.
   585		// Special cases for '0' && '0x'
   586		base, digits = 8, octalDigits
   587		if s.peek("xX") {
   588			s.consume("xX", false)
   589			base, digits = 16, hexadecimalDigits
   590		}
   591		return
   592	}
   593	
   594	// scanInt returns the value of the integer represented by the next
   595	// token, checking for overflow.  Any error is stored in s.err.
   596	func (s *ss) scanInt(verb int, bitSize int) int64 {
   597		if verb == 'c' {
   598			return s.scanRune(bitSize)
   599		}
   600		s.skipSpace(false)
   601		s.notEOF()
   602		base, digits := s.getBase(verb)
   603		haveDigits := false
   604		if verb == 'U' {
   605			if !s.consume("U", false) || !s.consume("+", false) {
   606				s.errorString("bad unicode format ")
   607			}
   608		} else {
   609			s.accept(sign) // If there's a sign, it will be left in the token buffer.
   610			if verb == 'v' {
   611				base, digits, haveDigits = s.scanBasePrefix()
   612			}
   613		}
   614		tok := s.scanNumber(digits, haveDigits)
   615		i, err := strconv.Btoi64(tok, base)
   616		if err != nil {
   617			s.error(err)
   618		}
   619		n := uint(bitSize)
   620		x := (i << (64 - n)) >> (64 - n)
   621		if x != i {
   622			s.errorString("integer overflow on token " + tok)
   623		}
   624		return i
   625	}
   626	
   627	// scanUint returns the value of the unsigned integer represented
   628	// by the next token, checking for overflow.  Any error is stored in s.err.
   629	func (s *ss) scanUint(verb int, bitSize int) uint64 {
   630		if verb == 'c' {
   631			return uint64(s.scanRune(bitSize))
   632		}
   633		s.skipSpace(false)
   634		s.notEOF()
   635		base, digits := s.getBase(verb)
   636		haveDigits := false
   637		if verb == 'U' {
   638			if !s.consume("U", false) || !s.consume("+", false) {
   639				s.errorString("bad unicode format ")
   640			}
   641		} else if verb == 'v' {
   642			base, digits, haveDigits = s.scanBasePrefix()
   643		}
   644		tok := s.scanNumber(digits, haveDigits)
   645		i, err := strconv.Btoui64(tok, base)
   646		if err != nil {
   647			s.error(err)
   648		}
   649		n := uint(bitSize)
   650		x := (i << (64 - n)) >> (64 - n)
   651		if x != i {
   652			s.errorString("unsigned integer overflow on token " + tok)
   653		}
   654		return i
   655	}
   656	
   657	// floatToken returns the floating-point number starting here, no longer than swid
   658	// if the width is specified. It's not rigorous about syntax because it doesn't check that
   659	// we have at least some digits, but Atof will do that.
   660	func (s *ss) floatToken() string {
   661		s.buf.Reset()
   662		// NaN?
   663		if s.accept("nN") && s.accept("aA") && s.accept("nN") {
   664			return s.buf.String()
   665		}
   666		// leading sign?
   667		s.accept(sign)
   668		// Inf?
   669		if s.accept("iI") && s.accept("nN") && s.accept("fF") {
   670			return s.buf.String()
   671		}
   672		// digits?
   673		for s.accept(decimalDigits) {
   674		}
   675		// decimal point?
   676		if s.accept(period) {
   677			// fraction?
   678			for s.accept(decimalDigits) {
   679			}
   680		}
   681		// exponent?
   682		if s.accept(exponent) {
   683			// leading sign?
   684			s.accept(sign)
   685			// digits?
   686			for s.accept(decimalDigits) {
   687			}
   688		}
   689		return s.buf.String()
   690	}
   691	
   692	// complexTokens returns the real and imaginary parts of the complex number starting here.
   693	// The number might be parenthesized and has the format (N+Ni) where N is a floating-point
   694	// number and there are no spaces within.
   695	func (s *ss) complexTokens() (real, imag string) {
   696		// TODO: accept N and Ni independently?
   697		parens := s.accept("(")
   698		real = s.floatToken()
   699		s.buf.Reset()
   700		// Must now have a sign.
   701		if !s.accept("+-") {
   702			s.error(complexError)
   703		}
   704		// Sign is now in buffer
   705		imagSign := s.buf.String()
   706		imag = s.floatToken()
   707		if !s.accept("i") {
   708			s.error(complexError)
   709		}
   710		if parens && !s.accept(")") {
   711			s.error(complexError)
   712		}
   713		return real, imagSign + imag
   714	}
   715	
   716	// convertFloat converts the string to a float64value.
   717	func (s *ss) convertFloat(str string, n int) float64 {
   718		if p := strings.Index(str, "p"); p >= 0 {
   719			// Atof doesn't handle power-of-2 exponents,
   720			// but they're easy to evaluate.
   721			f, err := strconv.AtofN(str[:p], n)
   722			if err != nil {
   723				// Put full string into error.
   724				if e, ok := err.(*strconv.NumError); ok {
   725					e.Num = str
   726				}
   727				s.error(err)
   728			}
   729			n, err := strconv.Atoi(str[p+1:])
   730			if err != nil {
   731				// Put full string into error.
   732				if e, ok := err.(*strconv.NumError); ok {
   733					e.Num = str
   734				}
   735				s.error(err)
   736			}
   737			return math.Ldexp(f, n)
   738		}
   739		f, err := strconv.AtofN(str, n)
   740		if err != nil {
   741			s.error(err)
   742		}
   743		return f
   744	}
   745	
   746	// convertComplex converts the next token to a complex128 value.
   747	// The atof argument is a type-specific reader for the underlying type.
   748	// If we're reading complex64, atof will parse float32s and convert them
   749	// to float64's to avoid reproducing this code for each complex type.
   750	func (s *ss) scanComplex(verb int, n int) complex128 {
   751		if !s.okVerb(verb, floatVerbs, "complex") {
   752			return 0
   753		}
   754		s.skipSpace(false)
   755		s.notEOF()
   756		sreal, simag := s.complexTokens()
   757		real := s.convertFloat(sreal, n/2)
   758		imag := s.convertFloat(simag, n/2)
   759		return complex(real, imag)
   760	}
   761	
   762	// convertString returns the string represented by the next input characters.
   763	// The format of the input is determined by the verb.
   764	func (s *ss) convertString(verb int) (str string) {
   765		if !s.okVerb(verb, "svqx", "string") {
   766			return ""
   767		}
   768		s.skipSpace(false)
   769		s.notEOF()
   770		switch verb {
   771		case 'q':
   772			str = s.quotedString()
   773		case 'x':
   774			str = s.hexString()
   775		default:
   776			str = string(s.token(true, notSpace)) // %s and %v just return the next word
   777		}
   778		return
   779	}
   780	
   781	// quotedString returns the double- or back-quoted string represented by the next input characters.
   782	func (s *ss) quotedString() string {
   783		s.notEOF()
   784		quote := s.getRune()
   785		switch quote {
   786		case '`':
   787			// Back-quoted: Anything goes until EOF or back quote.
   788			for {
   789				rune := s.mustReadRune()
   790				if rune == quote {
   791					break
   792				}
   793				s.buf.WriteRune(rune)
   794			}
   795			return s.buf.String()
   796		case '"':
   797			// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
   798			s.buf.WriteRune(quote)
   799			for {
   800				rune := s.mustReadRune()
   801				s.buf.WriteRune(rune)
   802				if rune == '\\' {
   803					// In a legal backslash escape, no matter how long, only the character
   804					// immediately after the escape can itself be a backslash or quote.
   805					// Thus we only need to protect the first character after the backslash.
   806					rune := s.mustReadRune()
   807					s.buf.WriteRune(rune)
   808				} else if rune == '"' {
   809					break
   810				}
   811			}
   812			result, err := strconv.Unquote(s.buf.String())
   813			if err != nil {
   814				s.error(err)
   815			}
   816			return result
   817		default:
   818			s.errorString("expected quoted string")
   819		}
   820		return ""
   821	}
   822	
   823	// hexDigit returns the value of the hexadecimal digit
   824	func (s *ss) hexDigit(digit int) int {
   825		switch digit {
   826		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   827			return digit - '0'
   828		case 'a', 'b', 'c', 'd', 'e', 'f':
   829			return 10 + digit - 'a'
   830		case 'A', 'B', 'C', 'D', 'E', 'F':
   831			return 10 + digit - 'A'
   832		}
   833		s.errorString("Scan: illegal hex digit")
   834		return 0
   835	}
   836	
   837	// hexByte returns the next hex-encoded (two-character) byte from the input.
   838	// There must be either two hexadecimal digits or a space character in the input.
   839	func (s *ss) hexByte() (b byte, ok bool) {
   840		rune1 := s.getRune()
   841		if rune1 == eof {
   842			return
   843		}
   844		if unicode.IsSpace(rune1) {
   845			s.UnreadRune()
   846			return
   847		}
   848		rune2 := s.mustReadRune()
   849		return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
   850	}
   851	
   852	// hexString returns the space-delimited hexpair-encoded string.
   853	func (s *ss) hexString() string {
   854		s.notEOF()
   855		for {
   856			b, ok := s.hexByte()
   857			if !ok {
   858				break
   859			}
   860			s.buf.WriteByte(b)
   861		}
   862		if s.buf.Len() == 0 {
   863			s.errorString("Scan: no hex data for %x string")
   864			return ""
   865		}
   866		return s.buf.String()
   867	}
   868	
   869	const floatVerbs = "beEfFgGv"
   870	
   871	const hugeWid = 1 << 30
   872	
   873	// scanOne scans a single value, deriving the scanner from the type of the argument.
   874	func (s *ss) scanOne(verb int, field interface{}) {
   875		s.buf.Reset()
   876		var err os.Error
   877		// If the parameter has its own Scan method, use that.
   878		if v, ok := field.(Scanner); ok {
   879			err = v.Scan(s, verb)
   880			if err != nil {
   881				if err == os.EOF {
   882					err = io.ErrUnexpectedEOF
   883				}
   884				s.error(err)
   885			}
   886			return
   887		}
   888	
   889		switch v := field.(type) {
   890		case *bool:
   891			*v = s.scanBool(verb)
   892		case *complex64:
   893			*v = complex64(s.scanComplex(verb, 64))
   894		case *complex128:
   895			*v = s.scanComplex(verb, 128)
   896		case *int:
   897			*v = int(s.scanInt(verb, intBits))
   898		case *int8:
   899			*v = int8(s.scanInt(verb, 8))
   900		case *int16:
   901			*v = int16(s.scanInt(verb, 16))
   902		case *int32:
   903			*v = int32(s.scanInt(verb, 32))
   904		case *int64:
   905			*v = s.scanInt(verb, 64)
   906		case *uint:
   907			*v = uint(s.scanUint(verb, intBits))
   908		case *uint8:
   909			*v = uint8(s.scanUint(verb, 8))
   910		case *uint16:
   911			*v = uint16(s.scanUint(verb, 16))
   912		case *uint32:
   913			*v = uint32(s.scanUint(verb, 32))
   914		case *uint64:
   915			*v = s.scanUint(verb, 64)
   916		case *uintptr:
   917			*v = uintptr(s.scanUint(verb, uintptrBits))
   918		// Floats are tricky because you want to scan in the precision of the result, not
   919		// scan in high precision and convert, in order to preserve the correct error condition.
   920		case *float32:
   921			if s.okVerb(verb, floatVerbs, "float32") {
   922				s.skipSpace(false)
   923				s.notEOF()
   924				*v = float32(s.convertFloat(s.floatToken(), 32))
   925			}
   926		case *float64:
   927			if s.okVerb(verb, floatVerbs, "float64") {
   928				s.skipSpace(false)
   929				s.notEOF()
   930				*v = s.convertFloat(s.floatToken(), 64)
   931			}
   932		case *string:
   933			*v = s.convertString(verb)
   934		case *[]byte:
   935			// We scan to string and convert so we get a copy of the data.
   936			// If we scanned to bytes, the slice would point at the buffer.
   937			*v = []byte(s.convertString(verb))
   938		default:
   939			val := reflect.ValueOf(v)
   940			ptr := val
   941			if ptr.Kind() != reflect.Ptr {
   942				s.errorString("Scan: type not a pointer: " + val.Type().String())
   943				return
   944			}
   945			switch v := ptr.Elem(); v.Kind() {
   946			case reflect.Bool:
   947				v.SetBool(s.scanBool(verb))
   948			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
   949				v.SetInt(s.scanInt(verb, v.Type().Bits()))
   950			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
   951				v.SetUint(s.scanUint(verb, v.Type().Bits()))
   952			case reflect.String:
   953				v.SetString(s.convertString(verb))
   954			case reflect.Slice:
   955				// For now, can only handle (renamed) []byte.
   956				typ := v.Type()
   957				if typ.Elem().Kind() != reflect.Uint8 {
   958					s.errorString("Scan: can't handle type: " + val.Type().String())
   959				}
   960				str := s.convertString(verb)
   961				v.Set(reflect.MakeSlice(typ, len(str), len(str)))
   962				for i := 0; i < len(str); i++ {
   963					v.Index(i).SetUint(uint64(str[i]))
   964				}
   965			case reflect.Float32, reflect.Float64:
   966				s.skipSpace(false)
   967				s.notEOF()
   968				v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
   969			case reflect.Complex64, reflect.Complex128:
   970				v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
   971			default:
   972				s.errorString("Scan: can't handle type: " + val.Type().String())
   973			}
   974		}
   975	}
   976	
   977	// errorHandler turns local panics into error returns.
   978	func errorHandler(errp *os.Error) {
   979		if e := recover(); e != nil {
   980			if se, ok := e.(scanError); ok { // catch local error
   981				*errp = se.err
   982			} else if eof, ok := e.(os.Error); ok && eof == os.EOF { // out of input
   983				*errp = eof
   984			} else {
   985				panic(e)
   986			}
   987		}
   988	}
   989	
   990	// doScan does the real work for scanning without a format string.
   991	func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) {
   992		defer errorHandler(&err)
   993		for _, field := range a {
   994			s.scanOne('v', field)
   995			numProcessed++
   996		}
   997		// Check for newline if required.
   998		if !s.nlIsSpace {
   999			for {
  1000				rune := s.getRune()
  1001				if rune == '\n' || rune == eof {
  1002					break
  1003				}
  1004				if !unicode.IsSpace(rune) {
  1005					s.errorString("Scan: expected newline")
  1006					break
  1007				}
  1008			}
  1009		}
  1010		return
  1011	}
  1012	
  1013	// advance determines whether the next characters in the input match
  1014	// those of the format.  It returns the number of bytes (sic) consumed
  1015	// in the format. Newlines included, all runs of space characters in
  1016	// either input or format behave as a single space. This routine also
  1017	// handles the %% case.  If the return value is zero, either format
  1018	// starts with a % (with no following %) or the input is empty.
  1019	// If it is negative, the input did not match the string.
  1020	func (s *ss) advance(format string) (i int) {
  1021		for i < len(format) {
  1022			fmtc, w := utf8.DecodeRuneInString(format[i:])
  1023			if fmtc == '%' {
  1024				// %% acts like a real percent
  1025				nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
  1026				if nextc != '%' {
  1027					return
  1028				}
  1029				i += w // skip the first %
  1030			}
  1031			sawSpace := false
  1032			for unicode.IsSpace(fmtc) && i < len(format) {
  1033				sawSpace = true
  1034				i += w
  1035				fmtc, w = utf8.DecodeRuneInString(format[i:])
  1036			}
  1037			if sawSpace {
  1038				// There was space in the format, so there should be space (EOF)
  1039				// in the input.
  1040				inputc := s.getRune()
  1041				if inputc == eof {
  1042					return
  1043				}
  1044				if !unicode.IsSpace(inputc) {
  1045					// Space in format but not in input: error
  1046					s.errorString("expected space in input to match format")
  1047				}
  1048				s.skipSpace(true)
  1049				continue
  1050			}
  1051			inputc := s.mustReadRune()
  1052			if fmtc != inputc {
  1053				s.UnreadRune()
  1054				return -1
  1055			}
  1056			i += w
  1057		}
  1058		return
  1059	}
  1060	
  1061	// doScanf does the real work when scanning with a format string.
  1062	//  At the moment, it handles only pointers to basic types.
  1063	func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.Error) {
  1064		defer errorHandler(&err)
  1065		end := len(format) - 1
  1066		// We process one item per non-trivial format
  1067		for i := 0; i <= end; {
  1068			w := s.advance(format[i:])
  1069			if w > 0 {
  1070				i += w
  1071				continue
  1072			}
  1073			// Either we failed to advance, we have a percent character, or we ran out of input.
  1074			if format[i] != '%' {
  1075				// Can't advance format.  Why not?
  1076				if w < 0 {
  1077					s.errorString("input does not match format")
  1078				}
  1079				// Otherwise at EOF; "too many operands" error handled below
  1080				break
  1081			}
  1082			i++ // % is one byte
  1083	
  1084			// do we have 20 (width)?
  1085			var widPresent bool
  1086			s.maxWid, widPresent, i = parsenum(format, i, end)
  1087			if !widPresent {
  1088				s.maxWid = hugeWid
  1089			}
  1090			s.fieldLimit = s.limit
  1091			if f := s.count + s.maxWid; f < s.fieldLimit {
  1092				s.fieldLimit = f
  1093			}
  1094	
  1095			c, w := utf8.DecodeRuneInString(format[i:])
  1096			i += w
  1097	
  1098			if numProcessed >= len(a) { // out of operands
  1099				s.errorString("too few operands for format %" + format[i-w:])
  1100				break
  1101			}
  1102			field := a[numProcessed]
  1103	
  1104			s.scanOne(c, field)
  1105			numProcessed++
  1106			s.fieldLimit = s.limit
  1107		}
  1108		if numProcessed < len(a) {
  1109			s.errorString("too many operands")
  1110		}
  1111		return
  1112	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.