1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strconv 6 7 import "errors" 8 9 // lower(c) is a lower-case letter if and only if 10 // c is either that lower-case letter or the equivalent upper-case letter. 11 // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'. 12 // Note that lower of non-letters can produce other non-letters. 13 func lower(c byte) byte { 14 return c | ('x' - 'X') 15 } 16 17 // ErrRange indicates that a value is out of range for the target type. 18 var ErrRange = errors.New("value out of range") 19 20 // ErrSyntax indicates that a value does not have the right syntax for the target type. 21 var ErrSyntax = errors.New("invalid syntax") 22 23 // A NumError records a failed conversion. 24 type NumError struct { 25 Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat) 26 Num string // the input 27 Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.) 28 } 29 30 func (e *NumError) Error() string { 31 return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error() 32 } 33 34 func syntaxError(fn, str string) *NumError { 35 return &NumError{fn, str, ErrSyntax} 36 } 37 38 func rangeError(fn, str string) *NumError { 39 return &NumError{fn, str, ErrRange} 40 } 41 42 func baseError(fn, str string, base int) *NumError { 43 return &NumError{fn, str, errors.New("invalid base " + Itoa(base))} 44 } 45 46 func bitSizeError(fn, str string, bitSize int) *NumError { 47 return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))} 48 } 49 50 const intSize = 32 << (^uint(0) >> 63) 51 52 // IntSize is the size in bits of an int or uint value. 53 const IntSize = intSize 54 55 const maxUint64 = 1<<64 - 1 56 57 // ParseUint is like ParseInt but for unsigned numbers. 58 func ParseUint(s string, base int, bitSize int) (uint64, error) { 59 const fnParseUint = "ParseUint" 60 61 if s == "" || !underscoreOK(s) { 62 return 0, syntaxError(fnParseUint, s) 63 } 64 65 base0 := base == 0 66 67 s0 := s 68 switch { 69 case 2 <= base && base <= 36: 70 // valid base; nothing to do 71 72 case base == 0: 73 // Look for octal, hex prefix. 74 base = 10 75 if s[0] == '0' { 76 switch { 77 case len(s) >= 3 && lower(s[1]) == 'b': 78 base = 2 79 s = s[2:] 80 case len(s) >= 3 && lower(s[1]) == 'o': 81 base = 8 82 s = s[2:] 83 case len(s) >= 3 && lower(s[1]) == 'x': 84 base = 16 85 s = s[2:] 86 default: 87 base = 8 88 s = s[1:] 89 } 90 } 91 92 default: 93 return 0, baseError(fnParseUint, s0, base) 94 } 95 96 if bitSize == 0 { 97 bitSize = int(IntSize) 98 } else if bitSize < 0 || bitSize > 64 { 99 return 0, bitSizeError(fnParseUint, s0, bitSize) 100 } 101 102 // Cutoff is the smallest number such that cutoff*base > maxUint64. 103 // Use compile-time constants for common cases. 104 var cutoff uint64 105 switch base { 106 case 10: 107 cutoff = maxUint64/10 + 1 108 case 16: 109 cutoff = maxUint64/16 + 1 110 default: 111 cutoff = maxUint64/uint64(base) + 1 112 } 113 114 maxVal := uint64(1)<<uint(bitSize) - 1 115 116 var n uint64 117 for _, c := range []byte(s) { 118 var d byte 119 switch { 120 case c == '_' && base0: 121 // underscoreOK already called 122 continue 123 case '0' <= c && c <= '9': 124 d = c - '0' 125 case 'a' <= lower(c) && lower(c) <= 'z': 126 d = lower(c) - 'a' + 10 127 default: 128 return 0, syntaxError(fnParseUint, s0) 129 } 130 131 if d >= byte(base) { 132 return 0, syntaxError(fnParseUint, s0) 133 } 134 135 if n >= cutoff { 136 // n*base overflows 137 return maxVal, rangeError(fnParseUint, s0) 138 } 139 n *= uint64(base) 140 141 n1 := n + uint64(d) 142 if n1 < n || n1 > maxVal { 143 // n+v overflows 144 return maxVal, rangeError(fnParseUint, s0) 145 } 146 n = n1 147 } 148 149 return n, nil 150 } 151 152 // ParseInt interprets a string s in the given base (0, 2 to 36) and 153 // bit size (0 to 64) and returns the corresponding value i. 154 // 155 // If base == 0, the base is implied by the string's prefix: 156 // base 2 for "0b", base 8 for "0" or "0o", base 16 for "0x", 157 // and base 10 otherwise. Also, for base == 0 only, underscore 158 // characters are permitted per the Go integer literal syntax. 159 // If base is below 0, is 1, or is above 36, an error is returned. 160 // 161 // The bitSize argument specifies the integer type 162 // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 163 // correspond to int, int8, int16, int32, and int64. 164 // If bitSize is below 0 or above 64, an error is returned. 165 // 166 // The errors that ParseInt returns have concrete type *NumError 167 // and include err.Num = s. If s is empty or contains invalid 168 // digits, err.Err = ErrSyntax and the returned value is 0; 169 // if the value corresponding to s cannot be represented by a 170 // signed integer of the given size, err.Err = ErrRange and the 171 // returned value is the maximum magnitude integer of the 172 // appropriate bitSize and sign. 173 func ParseInt(s string, base int, bitSize int) (i int64, err error) { 174 const fnParseInt = "ParseInt" 175 176 if s == "" { 177 return 0, syntaxError(fnParseInt, s) 178 } 179 180 // Pick off leading sign. 181 s0 := s 182 neg := false 183 if s[0] == '+' { 184 s = s[1:] 185 } else if s[0] == '-' { 186 neg = true 187 s = s[1:] 188 } 189 190 // Convert unsigned and check range. 191 var un uint64 192 un, err = ParseUint(s, base, bitSize) 193 if err != nil && err.(*NumError).Err != ErrRange { 194 err.(*NumError).Func = fnParseInt 195 err.(*NumError).Num = s0 196 return 0, err 197 } 198 199 if bitSize == 0 { 200 bitSize = int(IntSize) 201 } 202 203 cutoff := uint64(1 << uint(bitSize-1)) 204 if !neg && un >= cutoff { 205 return int64(cutoff - 1), rangeError(fnParseInt, s0) 206 } 207 if neg && un > cutoff { 208 return -int64(cutoff), rangeError(fnParseInt, s0) 209 } 210 n := int64(un) 211 if neg { 212 n = -n 213 } 214 return n, nil 215 } 216 217 // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int. 218 func Atoi(s string) (int, error) { 219 const fnAtoi = "Atoi" 220 221 sLen := len(s) 222 if intSize == 32 && (0 < sLen && sLen < 10) || 223 intSize == 64 && (0 < sLen && sLen < 19) { 224 // Fast path for small integers that fit int type. 225 s0 := s 226 if s[0] == '-' || s[0] == '+' { 227 s = s[1:] 228 if len(s) < 1 { 229 return 0, &NumError{fnAtoi, s0, ErrSyntax} 230 } 231 } 232 233 n := 0 234 for _, ch := range []byte(s) { 235 ch -= '0' 236 if ch > 9 { 237 return 0, &NumError{fnAtoi, s0, ErrSyntax} 238 } 239 n = n*10 + int(ch) 240 } 241 if s0[0] == '-' { 242 n = -n 243 } 244 return n, nil 245 } 246 247 // Slow path for invalid, big, or underscored integers. 248 i64, err := ParseInt(s, 10, 0) 249 if nerr, ok := err.(*NumError); ok { 250 nerr.Func = fnAtoi 251 } 252 return int(i64), err 253 } 254 255 // underscoreOK reports whether the underscores in s are allowed. 256 // Checking them in this one function lets all the parsers skip over them simply. 257 // Underscore must appear only between digits or between a base prefix and a digit. 258 func underscoreOK(s string) bool { 259 // saw tracks the last character (class) we saw: 260 // ^ for beginning of number, 261 // 0 for a digit or base prefix, 262 // _ for an underscore, 263 // ! for none of the above. 264 saw := '^' 265 i := 0 266 267 // Optional sign. 268 if len(s) >= 1 && (s[0] == '-' || s[0] == '+') { 269 s = s[1:] 270 } 271 272 // Optional base prefix. 273 hex := false 274 if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') { 275 i = 2 276 saw = '0' // base prefix counts as a digit for "underscore as digit separator" 277 hex = lower(s[1]) == 'x' 278 } 279 280 // Number proper. 281 for ; i < len(s); i++ { 282 // Digits are always okay. 283 if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' { 284 saw = '0' 285 continue 286 } 287 // Underscore must follow digit. 288 if s[i] == '_' { 289 if saw != '0' { 290 return false 291 } 292 saw = '_' 293 continue 294 } 295 // Underscore must also be followed by digit. 296 if saw == '_' { 297 return false 298 } 299 // Saw non-digit, non-underscore. 300 saw = '!' 301 } 302 return saw != '_' 303 } 304
View as plain text