atof.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // decimal to binary floating point conversion.
     8  // Algorithm:
     9  //   1) Store input in multiprecision decimal.
    10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11  //   3) Multiply by 2^precision and round to get mantissa.
    12  
    13  import "math"
    14  
    15  var optimize = true // set to false to force slow-path conversions for testing
    16  
    17  // commonPrefixLenIgnoreCase returns the length of the common
    18  // prefix of s and prefix, with the character case of s ignored.
    19  // The prefix argument must be all lower-case.
    20  func commonPrefixLenIgnoreCase(s, prefix string) int {
    21  	n := len(prefix)
    22  	if n > len(s) {
    23  		n = len(s)
    24  	}
    25  	for i := 0; i < n; i++ {
    26  		c := s[i]
    27  		if 'A' <= c && c <= 'Z' {
    28  			c += 'a' - 'A'
    29  		}
    30  		if c != prefix[i] {
    31  			return i
    32  		}
    33  	}
    34  	return n
    35  }
    36  
    37  // special returns the floating-point value for the special,
    38  // possibly signed floating-point representations inf, infinity,
    39  // and NaN. The result is ok if a prefix of s contains one
    40  // of these representations and n is the length of that prefix.
    41  // The character case is ignored.
    42  func special(s string) (f float64, n int, ok bool) {
    43  	if len(s) == 0 {
    44  		return 0, 0, false
    45  	}
    46  	sign := 1
    47  	nsign := 0
    48  	switch s[0] {
    49  	case '+', '-':
    50  		if s[0] == '-' {
    51  			sign = -1
    52  		}
    53  		nsign = 1
    54  		s = s[1:]
    55  		fallthrough
    56  	case 'i', 'I':
    57  		n := commonPrefixLenIgnoreCase(s, "infinity")
    58  		// Anything longer than "inf" is ok, but if we
    59  		// don't have "infinity", only consume "inf".
    60  		if 3 < n && n < 8 {
    61  			n = 3
    62  		}
    63  		if n == 3 || n == 8 {
    64  			return math.Inf(sign), nsign + n, true
    65  		}
    66  	case 'n', 'N':
    67  		if commonPrefixLenIgnoreCase(s, "nan") == 3 {
    68  			return math.NaN(), 3, true
    69  		}
    70  	}
    71  	return 0, 0, false
    72  }
    73  
    74  func (b *decimal) set(s string) (ok bool) {
    75  	i := 0
    76  	b.neg = false
    77  	b.trunc = false
    78  
    79  	// optional sign
    80  	if i >= len(s) {
    81  		return
    82  	}
    83  	switch {
    84  	case s[i] == '+':
    85  		i++
    86  	case s[i] == '-':
    87  		b.neg = true
    88  		i++
    89  	}
    90  
    91  	// digits
    92  	sawdot := false
    93  	sawdigits := false
    94  	for ; i < len(s); i++ {
    95  		switch {
    96  		case s[i] == '_':
    97  			// readFloat already checked underscores
    98  			continue
    99  		case s[i] == '.':
   100  			if sawdot {
   101  				return
   102  			}
   103  			sawdot = true
   104  			b.dp = b.nd
   105  			continue
   106  
   107  		case '0' <= s[i] && s[i] <= '9':
   108  			sawdigits = true
   109  			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
   110  				b.dp--
   111  				continue
   112  			}
   113  			if b.nd < len(b.d) {
   114  				b.d[b.nd] = s[i]
   115  				b.nd++
   116  			} else if s[i] != '0' {
   117  				b.trunc = true
   118  			}
   119  			continue
   120  		}
   121  		break
   122  	}
   123  	if !sawdigits {
   124  		return
   125  	}
   126  	if !sawdot {
   127  		b.dp = b.nd
   128  	}
   129  
   130  	// optional exponent moves decimal point.
   131  	// if we read a very large, very long number,
   132  	// just be sure to move the decimal point by
   133  	// a lot (say, 100000).  it doesn't matter if it's
   134  	// not the exact number.
   135  	if i < len(s) && lower(s[i]) == 'e' {
   136  		i++
   137  		if i >= len(s) {
   138  			return
   139  		}
   140  		esign := 1
   141  		if s[i] == '+' {
   142  			i++
   143  		} else if s[i] == '-' {
   144  			i++
   145  			esign = -1
   146  		}
   147  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   148  			return
   149  		}
   150  		e := 0
   151  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   152  			if s[i] == '_' {
   153  				// readFloat already checked underscores
   154  				continue
   155  			}
   156  			if e < 10000 {
   157  				e = e*10 + int(s[i]) - '0'
   158  			}
   159  		}
   160  		b.dp += e * esign
   161  	}
   162  
   163  	if i != len(s) {
   164  		return
   165  	}
   166  
   167  	ok = true
   168  	return
   169  }
   170  
   171  // readFloat reads a decimal or hexadecimal mantissa and exponent from a float
   172  // string representation in s; the number may be followed by other characters.
   173  // readFloat reports the number of bytes consumed (i), and whether the number
   174  // is valid (ok).
   175  func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
   176  	underscores := false
   177  
   178  	// optional sign
   179  	if i >= len(s) {
   180  		return
   181  	}
   182  	switch {
   183  	case s[i] == '+':
   184  		i++
   185  	case s[i] == '-':
   186  		neg = true
   187  		i++
   188  	}
   189  
   190  	// digits
   191  	base := uint64(10)
   192  	maxMantDigits := 19 // 10^19 fits in uint64
   193  	expChar := byte('e')
   194  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   195  		base = 16
   196  		maxMantDigits = 16 // 16^16 fits in uint64
   197  		i += 2
   198  		expChar = 'p'
   199  		hex = true
   200  	}
   201  	sawdot := false
   202  	sawdigits := false
   203  	nd := 0
   204  	ndMant := 0
   205  	dp := 0
   206  loop:
   207  	for ; i < len(s); i++ {
   208  		switch c := s[i]; true {
   209  		case c == '_':
   210  			underscores = true
   211  			continue
   212  
   213  		case c == '.':
   214  			if sawdot {
   215  				break loop
   216  			}
   217  			sawdot = true
   218  			dp = nd
   219  			continue
   220  
   221  		case '0' <= c && c <= '9':
   222  			sawdigits = true
   223  			if c == '0' && nd == 0 { // ignore leading zeros
   224  				dp--
   225  				continue
   226  			}
   227  			nd++
   228  			if ndMant < maxMantDigits {
   229  				mantissa *= base
   230  				mantissa += uint64(c - '0')
   231  				ndMant++
   232  			} else if c != '0' {
   233  				trunc = true
   234  			}
   235  			continue
   236  
   237  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   238  			sawdigits = true
   239  			nd++
   240  			if ndMant < maxMantDigits {
   241  				mantissa *= 16
   242  				mantissa += uint64(lower(c) - 'a' + 10)
   243  				ndMant++
   244  			} else {
   245  				trunc = true
   246  			}
   247  			continue
   248  		}
   249  		break
   250  	}
   251  	if !sawdigits {
   252  		return
   253  	}
   254  	if !sawdot {
   255  		dp = nd
   256  	}
   257  
   258  	if base == 16 {
   259  		dp *= 4
   260  		ndMant *= 4
   261  	}
   262  
   263  	// optional exponent moves decimal point.
   264  	// if we read a very large, very long number,
   265  	// just be sure to move the decimal point by
   266  	// a lot (say, 100000).  it doesn't matter if it's
   267  	// not the exact number.
   268  	if i < len(s) && lower(s[i]) == expChar {
   269  		i++
   270  		if i >= len(s) {
   271  			return
   272  		}
   273  		esign := 1
   274  		if s[i] == '+' {
   275  			i++
   276  		} else if s[i] == '-' {
   277  			i++
   278  			esign = -1
   279  		}
   280  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   281  			return
   282  		}
   283  		e := 0
   284  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   285  			if s[i] == '_' {
   286  				underscores = true
   287  				continue
   288  			}
   289  			if e < 10000 {
   290  				e = e*10 + int(s[i]) - '0'
   291  			}
   292  		}
   293  		dp += e * esign
   294  	} else if base == 16 {
   295  		// Must have exponent.
   296  		return
   297  	}
   298  
   299  	if mantissa != 0 {
   300  		exp = dp - ndMant
   301  	}
   302  
   303  	if underscores && !underscoreOK(s[:i]) {
   304  		return
   305  	}
   306  
   307  	ok = true
   308  	return
   309  }
   310  
   311  // decimal power of ten to binary power of two.
   312  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   313  
   314  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   315  	var exp int
   316  	var mant uint64
   317  
   318  	// Zero is always a special case.
   319  	if d.nd == 0 {
   320  		mant = 0
   321  		exp = flt.bias
   322  		goto out
   323  	}
   324  
   325  	// Obvious overflow/underflow.
   326  	// These bounds are for 64-bit floats.
   327  	// Will have to change if we want to support 80-bit floats in the future.
   328  	if d.dp > 310 {
   329  		goto overflow
   330  	}
   331  	if d.dp < -330 {
   332  		// zero
   333  		mant = 0
   334  		exp = flt.bias
   335  		goto out
   336  	}
   337  
   338  	// Scale by powers of two until in range [0.5, 1.0)
   339  	exp = 0
   340  	for d.dp > 0 {
   341  		var n int
   342  		if d.dp >= len(powtab) {
   343  			n = 27
   344  		} else {
   345  			n = powtab[d.dp]
   346  		}
   347  		d.Shift(-n)
   348  		exp += n
   349  	}
   350  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   351  		var n int
   352  		if -d.dp >= len(powtab) {
   353  			n = 27
   354  		} else {
   355  			n = powtab[-d.dp]
   356  		}
   357  		d.Shift(n)
   358  		exp -= n
   359  	}
   360  
   361  	// Our range is [0.5,1) but floating point range is [1,2).
   362  	exp--
   363  
   364  	// Minimum representable exponent is flt.bias+1.
   365  	// If the exponent is smaller, move it up and
   366  	// adjust d accordingly.
   367  	if exp < flt.bias+1 {
   368  		n := flt.bias + 1 - exp
   369  		d.Shift(-n)
   370  		exp += n
   371  	}
   372  
   373  	if exp-flt.bias >= 1<<flt.expbits-1 {
   374  		goto overflow
   375  	}
   376  
   377  	// Extract 1+flt.mantbits bits.
   378  	d.Shift(int(1 + flt.mantbits))
   379  	mant = d.RoundedInteger()
   380  
   381  	// Rounding might have added a bit; shift down.
   382  	if mant == 2<<flt.mantbits {
   383  		mant >>= 1
   384  		exp++
   385  		if exp-flt.bias >= 1<<flt.expbits-1 {
   386  			goto overflow
   387  		}
   388  	}
   389  
   390  	// Denormalized?
   391  	if mant&(1<<flt.mantbits) == 0 {
   392  		exp = flt.bias
   393  	}
   394  	goto out
   395  
   396  overflow:
   397  	// ±Inf
   398  	mant = 0
   399  	exp = 1<<flt.expbits - 1 + flt.bias
   400  	overflow = true
   401  
   402  out:
   403  	// Assemble bits.
   404  	bits := mant & (uint64(1)<<flt.mantbits - 1)
   405  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   406  	if d.neg {
   407  		bits |= 1 << flt.mantbits << flt.expbits
   408  	}
   409  	return bits, overflow
   410  }
   411  
   412  // Exact powers of 10.
   413  var float64pow10 = []float64{
   414  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   415  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   416  	1e20, 1e21, 1e22,
   417  }
   418  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   419  
   420  // If possible to convert decimal representation to 64-bit float f exactly,
   421  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   422  // Three common cases:
   423  //
   424  //	value is exact integer
   425  //	value is exact integer * exact power of ten
   426  //	value is exact integer / exact power of ten
   427  //
   428  // These all produce potentially inexact but correctly rounded answers.
   429  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   430  	if mantissa>>float64info.mantbits != 0 {
   431  		return
   432  	}
   433  	f = float64(mantissa)
   434  	if neg {
   435  		f = -f
   436  	}
   437  	switch {
   438  	case exp == 0:
   439  		// an integer.
   440  		return f, true
   441  	// Exact integers are <= 10^15.
   442  	// Exact powers of ten are <= 10^22.
   443  	case exp > 0 && exp <= 15+22: // int * 10^k
   444  		// If exponent is big but number of digits is not,
   445  		// can move a few zeros into the integer part.
   446  		if exp > 22 {
   447  			f *= float64pow10[exp-22]
   448  			exp = 22
   449  		}
   450  		if f > 1e15 || f < -1e15 {
   451  			// the exponent was really too large.
   452  			return
   453  		}
   454  		return f * float64pow10[exp], true
   455  	case exp < 0 && exp >= -22: // int / 10^k
   456  		return f / float64pow10[-exp], true
   457  	}
   458  	return
   459  }
   460  
   461  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
   462  // entirely in floating-point math, do so, avoiding the machinery above.
   463  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   464  	if mantissa>>float32info.mantbits != 0 {
   465  		return
   466  	}
   467  	f = float32(mantissa)
   468  	if neg {
   469  		f = -f
   470  	}
   471  	switch {
   472  	case exp == 0:
   473  		return f, true
   474  	// Exact integers are <= 10^7.
   475  	// Exact powers of ten are <= 10^10.
   476  	case exp > 0 && exp <= 7+10: // int * 10^k
   477  		// If exponent is big but number of digits is not,
   478  		// can move a few zeros into the integer part.
   479  		if exp > 10 {
   480  			f *= float32pow10[exp-10]
   481  			exp = 10
   482  		}
   483  		if f > 1e7 || f < -1e7 {
   484  			// the exponent was really too large.
   485  			return
   486  		}
   487  		return f * float32pow10[exp], true
   488  	case exp < 0 && exp >= -10: // int / 10^k
   489  		return f / float32pow10[-exp], true
   490  	}
   491  	return
   492  }
   493  
   494  // atofHex converts the hex floating-point string s
   495  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   496  // and returns it as a float64.
   497  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   498  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   499  func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   500  	maxExp := 1<<flt.expbits + flt.bias - 2
   501  	minExp := flt.bias + 1
   502  	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
   503  
   504  	// Shift mantissa and exponent to bring representation into float range.
   505  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   506  	// For rounding, we need two more, where the bottom bit represents
   507  	// whether that bit or any later bit was non-zero.
   508  	// (If the mantissa has already lost non-zero bits, trunc is true,
   509  	// and we OR in a 1 below after shifting left appropriately.)
   510  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   511  		mantissa <<= 1
   512  		exp--
   513  	}
   514  	if trunc {
   515  		mantissa |= 1
   516  	}
   517  	for mantissa>>(1+flt.mantbits+2) != 0 {
   518  		mantissa = mantissa>>1 | mantissa&1
   519  		exp++
   520  	}
   521  
   522  	// If exponent is too negative,
   523  	// denormalize in hopes of making it representable.
   524  	// (The -2 is for the rounding bits.)
   525  	for mantissa > 1 && exp < minExp-2 {
   526  		mantissa = mantissa>>1 | mantissa&1
   527  		exp++
   528  	}
   529  
   530  	// Round using two bottom bits.
   531  	round := mantissa & 3
   532  	mantissa >>= 2
   533  	round |= mantissa & 1 // round to even (round up if mantissa is odd)
   534  	exp += 2
   535  	if round == 3 {
   536  		mantissa++
   537  		if mantissa == 1<<(1+flt.mantbits) {
   538  			mantissa >>= 1
   539  			exp++
   540  		}
   541  	}
   542  
   543  	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
   544  		exp = flt.bias
   545  	}
   546  	var err error
   547  	if exp > maxExp { // infinity and range error
   548  		mantissa = 1 << flt.mantbits
   549  		exp = maxExp + 1
   550  		err = rangeError(fnParseFloat, s)
   551  	}
   552  
   553  	bits := mantissa & (1<<flt.mantbits - 1)
   554  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   555  	if neg {
   556  		bits |= 1 << flt.mantbits << flt.expbits
   557  	}
   558  	if flt == &float32info {
   559  		return float64(math.Float32frombits(uint32(bits))), err
   560  	}
   561  	return math.Float64frombits(bits), err
   562  }
   563  
   564  const fnParseFloat = "ParseFloat"
   565  
   566  func atof32(s string) (f float32, n int, err error) {
   567  	if val, n, ok := special(s); ok {
   568  		return float32(val), n, nil
   569  	}
   570  
   571  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
   572  	if !ok {
   573  		return 0, n, syntaxError(fnParseFloat, s)
   574  	}
   575  
   576  	if hex {
   577  		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
   578  		return float32(f), n, err
   579  	}
   580  
   581  	if optimize {
   582  		// Try pure floating-point arithmetic conversion, and if that fails,
   583  		// the Eisel-Lemire algorithm.
   584  		if !trunc {
   585  			if f, ok := atof32exact(mantissa, exp, neg); ok {
   586  				return f, n, nil
   587  			}
   588  		}
   589  		f, ok := eiselLemire32(mantissa, exp, neg)
   590  		if ok {
   591  			if !trunc {
   592  				return f, n, nil
   593  			}
   594  			// Even if the mantissa was truncated, we may
   595  			// have found the correct result. Confirm by
   596  			// converting the upper mantissa bound.
   597  			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
   598  			if ok && f == fUp {
   599  				return f, n, nil
   600  			}
   601  		}
   602  	}
   603  
   604  	// Slow fallback.
   605  	var d decimal
   606  	if !d.set(s[:n]) {
   607  		return 0, n, syntaxError(fnParseFloat, s)
   608  	}
   609  	b, ovf := d.floatBits(&float32info)
   610  	f = math.Float32frombits(uint32(b))
   611  	if ovf {
   612  		err = rangeError(fnParseFloat, s)
   613  	}
   614  	return f, n, err
   615  }
   616  
   617  func atof64(s string) (f float64, n int, err error) {
   618  	if val, n, ok := special(s); ok {
   619  		return val, n, nil
   620  	}
   621  
   622  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
   623  	if !ok {
   624  		return 0, n, syntaxError(fnParseFloat, s)
   625  	}
   626  
   627  	if hex {
   628  		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
   629  		return f, n, err
   630  	}
   631  
   632  	if optimize {
   633  		// Try pure floating-point arithmetic conversion, and if that fails,
   634  		// the Eisel-Lemire algorithm.
   635  		if !trunc {
   636  			if f, ok := atof64exact(mantissa, exp, neg); ok {
   637  				return f, n, nil
   638  			}
   639  		}
   640  		f, ok := eiselLemire64(mantissa, exp, neg)
   641  		if ok {
   642  			if !trunc {
   643  				return f, n, nil
   644  			}
   645  			// Even if the mantissa was truncated, we may
   646  			// have found the correct result. Confirm by
   647  			// converting the upper mantissa bound.
   648  			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
   649  			if ok && f == fUp {
   650  				return f, n, nil
   651  			}
   652  		}
   653  	}
   654  
   655  	// Slow fallback.
   656  	var d decimal
   657  	if !d.set(s[:n]) {
   658  		return 0, n, syntaxError(fnParseFloat, s)
   659  	}
   660  	b, ovf := d.floatBits(&float64info)
   661  	f = math.Float64frombits(b)
   662  	if ovf {
   663  		err = rangeError(fnParseFloat, s)
   664  	}
   665  	return f, n, err
   666  }
   667  
   668  // ParseFloat converts the string s to a floating-point number
   669  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
   670  // When bitSize=32, the result still has type float64, but it will be
   671  // convertible to float32 without changing its value.
   672  //
   673  // ParseFloat accepts decimal and hexadecimal floating-point numbers
   674  // as defined by the Go syntax for [floating-point literals].
   675  // If s is well-formed and near a valid floating-point number,
   676  // ParseFloat returns the nearest floating-point number rounded
   677  // using IEEE754 unbiased rounding.
   678  // (Parsing a hexadecimal floating-point value only rounds when
   679  // there are more bits in the hexadecimal representation than
   680  // will fit in the mantissa.)
   681  //
   682  // The errors that ParseFloat returns have concrete type *NumError
   683  // and include err.Num = s.
   684  //
   685  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   686  //
   687  // If s is syntactically well-formed but is more than 1/2 ULP
   688  // away from the largest floating point number of the given size,
   689  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
   690  //
   691  // ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity"
   692  // as their respective special floating point values. It ignores case when matching.
   693  //
   694  // [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals
   695  func ParseFloat(s string, bitSize int) (float64, error) {
   696  	f, n, err := parseFloatPrefix(s, bitSize)
   697  	if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) {
   698  		return 0, syntaxError(fnParseFloat, s)
   699  	}
   700  	return f, err
   701  }
   702  
   703  func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
   704  	if bitSize == 32 {
   705  		f, n, err := atof32(s)
   706  		return float64(f), n, err
   707  	}
   708  	return atof64(s)
   709  }
   710
View as plain text