atoi.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  import "errors"
     8  
     9  // lower(c) is a lower-case letter if and only if
    10  // c is either that lower-case letter or the equivalent upper-case letter.
    11  // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
    12  // Note that lower of non-letters can produce other non-letters.
    13  func lower(c byte) byte {
    14  	return c | ('x' - 'X')
    15  }
    16  
    17  // ErrRange indicates that a value is out of range for the target type.
    18  var ErrRange = errors.New("value out of range")
    19  
    20  // ErrSyntax indicates that a value does not have the right syntax for the target type.
    21  var ErrSyntax = errors.New("invalid syntax")
    22  
    23  // A NumError records a failed conversion.
    24  type NumError struct {
    25  	Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
    26  	Num  string // the input
    27  	Err  error  // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
    28  }
    29  
    30  func (e *NumError) Error() string {
    31  	return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
    32  }
    33  
    34  func (e *NumError) Unwrap() error { return e.Err }
    35  
    36  // cloneString returns a string copy of x.
    37  //
    38  // All ParseXXX functions allow the input string to escape to the error value.
    39  // This hurts strconv.ParseXXX(string(b)) calls where b is []byte since
    40  // the conversion from []byte must allocate a string on the heap.
    41  // If we assume errors are infrequent, then we can avoid escaping the input
    42  // back to the output by copying it first. This allows the compiler to call
    43  // strconv.ParseXXX without a heap allocation for most []byte to string
    44  // conversions, since it can now prove that the string cannot escape Parse.
    45  //
    46  // TODO: Use strings.Clone instead? However, we cannot depend on "strings"
    47  // since it incurs a transitive dependency on "unicode".
    48  // Either move strings.Clone to an internal/bytealg or make the
    49  // "strings" to "unicode" dependency lighter (see https://go.dev/issue/54098).
    50  func cloneString(x string) string { return string([]byte(x)) }
    51  
    52  func syntaxError(fn, str string) *NumError {
    53  	return &NumError{fn, cloneString(str), ErrSyntax}
    54  }
    55  
    56  func rangeError(fn, str string) *NumError {
    57  	return &NumError{fn, cloneString(str), ErrRange}
    58  }
    59  
    60  func baseError(fn, str string, base int) *NumError {
    61  	return &NumError{fn, cloneString(str), errors.New("invalid base " + Itoa(base))}
    62  }
    63  
    64  func bitSizeError(fn, str string, bitSize int) *NumError {
    65  	return &NumError{fn, cloneString(str), errors.New("invalid bit size " + Itoa(bitSize))}
    66  }
    67  
    68  const intSize = 32 << (^uint(0) >> 63)
    69  
    70  // IntSize is the size in bits of an int or uint value.
    71  const IntSize = intSize
    72  
    73  const maxUint64 = 1<<64 - 1
    74  
    75  // ParseUint is like ParseInt but for unsigned numbers.
    76  //
    77  // A sign prefix is not permitted.
    78  func ParseUint(s string, base int, bitSize int) (uint64, error) {
    79  	const fnParseUint = "ParseUint"
    80  
    81  	if s == "" {
    82  		return 0, syntaxError(fnParseUint, s)
    83  	}
    84  
    85  	base0 := base == 0
    86  
    87  	s0 := s
    88  	switch {
    89  	case 2 <= base && base <= 36:
    90  		// valid base; nothing to do
    91  
    92  	case base == 0:
    93  		// Look for octal, hex prefix.
    94  		base = 10
    95  		if s[0] == '0' {
    96  			switch {
    97  			case len(s) >= 3 && lower(s[1]) == 'b':
    98  				base = 2
    99  				s = s[2:]
   100  			case len(s) >= 3 && lower(s[1]) == 'o':
   101  				base = 8
   102  				s = s[2:]
   103  			case len(s) >= 3 && lower(s[1]) == 'x':
   104  				base = 16
   105  				s = s[2:]
   106  			default:
   107  				base = 8
   108  				s = s[1:]
   109  			}
   110  		}
   111  
   112  	default:
   113  		return 0, baseError(fnParseUint, s0, base)
   114  	}
   115  
   116  	if bitSize == 0 {
   117  		bitSize = IntSize
   118  	} else if bitSize < 0 || bitSize > 64 {
   119  		return 0, bitSizeError(fnParseUint, s0, bitSize)
   120  	}
   121  
   122  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
   123  	// Use compile-time constants for common cases.
   124  	var cutoff uint64
   125  	switch base {
   126  	case 10:
   127  		cutoff = maxUint64/10 + 1
   128  	case 16:
   129  		cutoff = maxUint64/16 + 1
   130  	default:
   131  		cutoff = maxUint64/uint64(base) + 1
   132  	}
   133  
   134  	maxVal := uint64(1)<<uint(bitSize) - 1
   135  
   136  	underscores := false
   137  	var n uint64
   138  	for _, c := range []byte(s) {
   139  		var d byte
   140  		switch {
   141  		case c == '_' && base0:
   142  			underscores = true
   143  			continue
   144  		case '0' <= c && c <= '9':
   145  			d = c - '0'
   146  		case 'a' <= lower(c) && lower(c) <= 'z':
   147  			d = lower(c) - 'a' + 10
   148  		default:
   149  			return 0, syntaxError(fnParseUint, s0)
   150  		}
   151  
   152  		if d >= byte(base) {
   153  			return 0, syntaxError(fnParseUint, s0)
   154  		}
   155  
   156  		if n >= cutoff {
   157  			// n*base overflows
   158  			return maxVal, rangeError(fnParseUint, s0)
   159  		}
   160  		n *= uint64(base)
   161  
   162  		n1 := n + uint64(d)
   163  		if n1 < n || n1 > maxVal {
   164  			// n+d overflows
   165  			return maxVal, rangeError(fnParseUint, s0)
   166  		}
   167  		n = n1
   168  	}
   169  
   170  	if underscores && !underscoreOK(s0) {
   171  		return 0, syntaxError(fnParseUint, s0)
   172  	}
   173  
   174  	return n, nil
   175  }
   176  
   177  // ParseInt interprets a string s in the given base (0, 2 to 36) and
   178  // bit size (0 to 64) and returns the corresponding value i.
   179  //
   180  // The string may begin with a leading sign: "+" or "-".
   181  //
   182  // If the base argument is 0, the true base is implied by the string's
   183  // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
   184  // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
   185  // underscore characters are permitted as defined by the Go syntax for
   186  // [integer literals].
   187  //
   188  // The bitSize argument specifies the integer type
   189  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   190  // correspond to int, int8, int16, int32, and int64.
   191  // If bitSize is below 0 or above 64, an error is returned.
   192  //
   193  // The errors that ParseInt returns have concrete type *NumError
   194  // and include err.Num = s. If s is empty or contains invalid
   195  // digits, err.Err = ErrSyntax and the returned value is 0;
   196  // if the value corresponding to s cannot be represented by a
   197  // signed integer of the given size, err.Err = ErrRange and the
   198  // returned value is the maximum magnitude integer of the
   199  // appropriate bitSize and sign.
   200  //
   201  // [integer literals]: https://go.dev/ref/spec#Integer_literals
   202  func ParseInt(s string, base int, bitSize int) (i int64, err error) {
   203  	const fnParseInt = "ParseInt"
   204  
   205  	if s == "" {
   206  		return 0, syntaxError(fnParseInt, s)
   207  	}
   208  
   209  	// Pick off leading sign.
   210  	s0 := s
   211  	neg := false
   212  	if s[0] == '+' {
   213  		s = s[1:]
   214  	} else if s[0] == '-' {
   215  		neg = true
   216  		s = s[1:]
   217  	}
   218  
   219  	// Convert unsigned and check range.
   220  	var un uint64
   221  	un, err = ParseUint(s, base, bitSize)
   222  	if err != nil && err.(*NumError).Err != ErrRange {
   223  		err.(*NumError).Func = fnParseInt
   224  		err.(*NumError).Num = cloneString(s0)
   225  		return 0, err
   226  	}
   227  
   228  	if bitSize == 0 {
   229  		bitSize = IntSize
   230  	}
   231  
   232  	cutoff := uint64(1 << uint(bitSize-1))
   233  	if !neg && un >= cutoff {
   234  		return int64(cutoff - 1), rangeError(fnParseInt, s0)
   235  	}
   236  	if neg && un > cutoff {
   237  		return -int64(cutoff), rangeError(fnParseInt, s0)
   238  	}
   239  	n := int64(un)
   240  	if neg {
   241  		n = -n
   242  	}
   243  	return n, nil
   244  }
   245  
   246  // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   247  func Atoi(s string) (int, error) {
   248  	const fnAtoi = "Atoi"
   249  
   250  	sLen := len(s)
   251  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   252  		intSize == 64 && (0 < sLen && sLen < 19) {
   253  		// Fast path for small integers that fit int type.
   254  		s0 := s
   255  		if s[0] == '-' || s[0] == '+' {
   256  			s = s[1:]
   257  			if len(s) < 1 {
   258  				return 0, syntaxError(fnAtoi, s0)
   259  			}
   260  		}
   261  
   262  		n := 0
   263  		for _, ch := range []byte(s) {
   264  			ch -= '0'
   265  			if ch > 9 {
   266  				return 0, syntaxError(fnAtoi, s0)
   267  			}
   268  			n = n*10 + int(ch)
   269  		}
   270  		if s0[0] == '-' {
   271  			n = -n
   272  		}
   273  		return n, nil
   274  	}
   275  
   276  	// Slow path for invalid, big, or underscored integers.
   277  	i64, err := ParseInt(s, 10, 0)
   278  	if nerr, ok := err.(*NumError); ok {
   279  		nerr.Func = fnAtoi
   280  	}
   281  	return int(i64), err
   282  }
   283  
   284  // underscoreOK reports whether the underscores in s are allowed.
   285  // Checking them in this one function lets all the parsers skip over them simply.
   286  // Underscore must appear only between digits or between a base prefix and a digit.
   287  func underscoreOK(s string) bool {
   288  	// saw tracks the last character (class) we saw:
   289  	// ^ for beginning of number,
   290  	// 0 for a digit or base prefix,
   291  	// _ for an underscore,
   292  	// ! for none of the above.
   293  	saw := '^'
   294  	i := 0
   295  
   296  	// Optional sign.
   297  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   298  		s = s[1:]
   299  	}
   300  
   301  	// Optional base prefix.
   302  	hex := false
   303  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   304  		i = 2
   305  		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
   306  		hex = lower(s[1]) == 'x'
   307  	}
   308  
   309  	// Number proper.
   310  	for ; i < len(s); i++ {
   311  		// Digits are always okay.
   312  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   313  			saw = '0'
   314  			continue
   315  		}
   316  		// Underscore must follow digit.
   317  		if s[i] == '_' {
   318  			if saw != '0' {
   319  				return false
   320  			}
   321  			saw = '_'
   322  			continue
   323  		}
   324  		// Underscore must also be followed by digit.
   325  		if saw == '_' {
   326  			return false
   327  		}
   328  		// Saw non-digit, non-underscore.
   329  		saw = '!'
   330  	}
   331  	return saw != '_'
   332  }
   333
View as plain text