Source file src/strconv/atof.go

Documentation: strconv

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // decimal to binary floating point conversion.
     8  // Algorithm:
     9  //   1) Store input in multiprecision decimal.
    10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11  //   3) Multiply by 2^precision and round to get mantissa.
    12  
    13  import "math"
    14  
    15  var optimize = true // set to false to force slow-path conversions for testing
    16  
    17  func equalIgnoreCase(s1, s2 string) bool {
    18  	if len(s1) != len(s2) {
    19  		return false
    20  	}
    21  	for i := 0; i < len(s1); i++ {
    22  		c1 := s1[i]
    23  		if 'A' <= c1 && c1 <= 'Z' {
    24  			c1 += 'a' - 'A'
    25  		}
    26  		c2 := s2[i]
    27  		if 'A' <= c2 && c2 <= 'Z' {
    28  			c2 += 'a' - 'A'
    29  		}
    30  		if c1 != c2 {
    31  			return false
    32  		}
    33  	}
    34  	return true
    35  }
    36  
    37  func special(s string) (f float64, ok bool) {
    38  	if len(s) == 0 {
    39  		return
    40  	}
    41  	switch s[0] {
    42  	default:
    43  		return
    44  	case '+':
    45  		if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") {
    46  			return math.Inf(1), true
    47  		}
    48  	case '-':
    49  		if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") {
    50  			return math.Inf(-1), true
    51  		}
    52  	case 'n', 'N':
    53  		if equalIgnoreCase(s, "nan") {
    54  			return math.NaN(), true
    55  		}
    56  	case 'i', 'I':
    57  		if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") {
    58  			return math.Inf(1), true
    59  		}
    60  	}
    61  	return
    62  }
    63  
    64  func (b *decimal) set(s string) (ok bool) {
    65  	i := 0
    66  	b.neg = false
    67  	b.trunc = false
    68  
    69  	// optional sign
    70  	if i >= len(s) {
    71  		return
    72  	}
    73  	switch {
    74  	case s[i] == '+':
    75  		i++
    76  	case s[i] == '-':
    77  		b.neg = true
    78  		i++
    79  	}
    80  
    81  	// digits
    82  	sawdot := false
    83  	sawdigits := false
    84  	for ; i < len(s); i++ {
    85  		switch {
    86  		case s[i] == '_':
    87  			// underscoreOK already called
    88  			continue
    89  		case s[i] == '.':
    90  			if sawdot {
    91  				return
    92  			}
    93  			sawdot = true
    94  			b.dp = b.nd
    95  			continue
    96  
    97  		case '0' <= s[i] && s[i] <= '9':
    98  			sawdigits = true
    99  			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
   100  				b.dp--
   101  				continue
   102  			}
   103  			if b.nd < len(b.d) {
   104  				b.d[b.nd] = s[i]
   105  				b.nd++
   106  			} else if s[i] != '0' {
   107  				b.trunc = true
   108  			}
   109  			continue
   110  		}
   111  		break
   112  	}
   113  	if !sawdigits {
   114  		return
   115  	}
   116  	if !sawdot {
   117  		b.dp = b.nd
   118  	}
   119  
   120  	// optional exponent moves decimal point.
   121  	// if we read a very large, very long number,
   122  	// just be sure to move the decimal point by
   123  	// a lot (say, 100000).  it doesn't matter if it's
   124  	// not the exact number.
   125  	if i < len(s) && lower(s[i]) == 'e' {
   126  		i++
   127  		if i >= len(s) {
   128  			return
   129  		}
   130  		esign := 1
   131  		if s[i] == '+' {
   132  			i++
   133  		} else if s[i] == '-' {
   134  			i++
   135  			esign = -1
   136  		}
   137  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   138  			return
   139  		}
   140  		e := 0
   141  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   142  			if s[i] == '_' {
   143  				// underscoreOK already called
   144  				continue
   145  			}
   146  			if e < 10000 {
   147  				e = e*10 + int(s[i]) - '0'
   148  			}
   149  		}
   150  		b.dp += e * esign
   151  	}
   152  
   153  	if i != len(s) {
   154  		return
   155  	}
   156  
   157  	ok = true
   158  	return
   159  }
   160  
   161  // readFloat reads a decimal mantissa and exponent from a float
   162  // string representation. It returns ok==false if the number could
   163  // not fit return types or is invalid.
   164  func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
   165  	i := 0
   166  
   167  	// optional sign
   168  	if i >= len(s) {
   169  		return
   170  	}
   171  	switch {
   172  	case s[i] == '+':
   173  		i++
   174  	case s[i] == '-':
   175  		neg = true
   176  		i++
   177  	}
   178  
   179  	// digits
   180  	base := uint64(10)
   181  	maxMantDigits := 19 // 10^19 fits in uint64
   182  	expChar := byte('e')
   183  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   184  		base = 16
   185  		maxMantDigits = 16 // 16^16 fits in uint64
   186  		i += 2
   187  		expChar = 'p'
   188  		hex = true
   189  	}
   190  	sawdot := false
   191  	sawdigits := false
   192  	nd := 0
   193  	ndMant := 0
   194  	dp := 0
   195  	for ; i < len(s); i++ {
   196  		switch c := s[i]; true {
   197  		case c == '_':
   198  			// underscoreOK already called
   199  			continue
   200  
   201  		case c == '.':
   202  			if sawdot {
   203  				return
   204  			}
   205  			sawdot = true
   206  			dp = nd
   207  			continue
   208  
   209  		case '0' <= c && c <= '9':
   210  			sawdigits = true
   211  			if c == '0' && nd == 0 { // ignore leading zeros
   212  				dp--
   213  				continue
   214  			}
   215  			nd++
   216  			if ndMant < maxMantDigits {
   217  				mantissa *= base
   218  				mantissa += uint64(c - '0')
   219  				ndMant++
   220  			} else if c != '0' {
   221  				trunc = true
   222  			}
   223  			continue
   224  
   225  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   226  			sawdigits = true
   227  			nd++
   228  			if ndMant < maxMantDigits {
   229  				mantissa *= 16
   230  				mantissa += uint64(lower(c) - 'a' + 10)
   231  				ndMant++
   232  			} else {
   233  				trunc = true
   234  			}
   235  			continue
   236  		}
   237  		break
   238  	}
   239  	if !sawdigits {
   240  		return
   241  	}
   242  	if !sawdot {
   243  		dp = nd
   244  	}
   245  
   246  	if base == 16 {
   247  		dp *= 4
   248  		ndMant *= 4
   249  	}
   250  
   251  	// optional exponent moves decimal point.
   252  	// if we read a very large, very long number,
   253  	// just be sure to move the decimal point by
   254  	// a lot (say, 100000).  it doesn't matter if it's
   255  	// not the exact number.
   256  	if i < len(s) && lower(s[i]) == expChar {
   257  		i++
   258  		if i >= len(s) {
   259  			return
   260  		}
   261  		esign := 1
   262  		if s[i] == '+' {
   263  			i++
   264  		} else if s[i] == '-' {
   265  			i++
   266  			esign = -1
   267  		}
   268  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   269  			return
   270  		}
   271  		e := 0
   272  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   273  			if s[i] == '_' {
   274  				// underscoreOK already called
   275  				continue
   276  			}
   277  			if e < 10000 {
   278  				e = e*10 + int(s[i]) - '0'
   279  			}
   280  		}
   281  		dp += e * esign
   282  	} else if base == 16 {
   283  		// Must have exponent.
   284  		return
   285  	}
   286  
   287  	if i != len(s) {
   288  		return
   289  	}
   290  
   291  	if mantissa != 0 {
   292  		exp = dp - ndMant
   293  	}
   294  	ok = true
   295  	return
   296  }
   297  
   298  // decimal power of ten to binary power of two.
   299  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   300  
   301  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   302  	var exp int
   303  	var mant uint64
   304  
   305  	// Zero is always a special case.
   306  	if d.nd == 0 {
   307  		mant = 0
   308  		exp = flt.bias
   309  		goto out
   310  	}
   311  
   312  	// Obvious overflow/underflow.
   313  	// These bounds are for 64-bit floats.
   314  	// Will have to change if we want to support 80-bit floats in the future.
   315  	if d.dp > 310 {
   316  		goto overflow
   317  	}
   318  	if d.dp < -330 {
   319  		// zero
   320  		mant = 0
   321  		exp = flt.bias
   322  		goto out
   323  	}
   324  
   325  	// Scale by powers of two until in range [0.5, 1.0)
   326  	exp = 0
   327  	for d.dp > 0 {
   328  		var n int
   329  		if d.dp >= len(powtab) {
   330  			n = 27
   331  		} else {
   332  			n = powtab[d.dp]
   333  		}
   334  		d.Shift(-n)
   335  		exp += n
   336  	}
   337  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   338  		var n int
   339  		if -d.dp >= len(powtab) {
   340  			n = 27
   341  		} else {
   342  			n = powtab[-d.dp]
   343  		}
   344  		d.Shift(n)
   345  		exp -= n
   346  	}
   347  
   348  	// Our range is [0.5,1) but floating point range is [1,2).
   349  	exp--
   350  
   351  	// Minimum representable exponent is flt.bias+1.
   352  	// If the exponent is smaller, move it up and
   353  	// adjust d accordingly.
   354  	if exp < flt.bias+1 {
   355  		n := flt.bias + 1 - exp
   356  		d.Shift(-n)
   357  		exp += n
   358  	}
   359  
   360  	if exp-flt.bias >= 1<<flt.expbits-1 {
   361  		goto overflow
   362  	}
   363  
   364  	// Extract 1+flt.mantbits bits.
   365  	d.Shift(int(1 + flt.mantbits))
   366  	mant = d.RoundedInteger()
   367  
   368  	// Rounding might have added a bit; shift down.
   369  	if mant == 2<<flt.mantbits {
   370  		mant >>= 1
   371  		exp++
   372  		if exp-flt.bias >= 1<<flt.expbits-1 {
   373  			goto overflow
   374  		}
   375  	}
   376  
   377  	// Denormalized?
   378  	if mant&(1<<flt.mantbits) == 0 {
   379  		exp = flt.bias
   380  	}
   381  	goto out
   382  
   383  overflow:
   384  	// ±Inf
   385  	mant = 0
   386  	exp = 1<<flt.expbits - 1 + flt.bias
   387  	overflow = true
   388  
   389  out:
   390  	// Assemble bits.
   391  	bits := mant & (uint64(1)<<flt.mantbits - 1)
   392  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   393  	if d.neg {
   394  		bits |= 1 << flt.mantbits << flt.expbits
   395  	}
   396  	return bits, overflow
   397  }
   398  
   399  // Exact powers of 10.
   400  var float64pow10 = []float64{
   401  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   402  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   403  	1e20, 1e21, 1e22,
   404  }
   405  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   406  
   407  // If possible to convert decimal representation to 64-bit float f exactly,
   408  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   409  // Three common cases:
   410  //	value is exact integer
   411  //	value is exact integer * exact power of ten
   412  //	value is exact integer / exact power of ten
   413  // These all produce potentially inexact but correctly rounded answers.
   414  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   415  	if mantissa>>float64info.mantbits != 0 {
   416  		return
   417  	}
   418  	f = float64(mantissa)
   419  	if neg {
   420  		f = -f
   421  	}
   422  	switch {
   423  	case exp == 0:
   424  		// an integer.
   425  		return f, true
   426  	// Exact integers are <= 10^15.
   427  	// Exact powers of ten are <= 10^22.
   428  	case exp > 0 && exp <= 15+22: // int * 10^k
   429  		// If exponent is big but number of digits is not,
   430  		// can move a few zeros into the integer part.
   431  		if exp > 22 {
   432  			f *= float64pow10[exp-22]
   433  			exp = 22
   434  		}
   435  		if f > 1e15 || f < -1e15 {
   436  			// the exponent was really too large.
   437  			return
   438  		}
   439  		return f * float64pow10[exp], true
   440  	case exp < 0 && exp >= -22: // int / 10^k
   441  		return f / float64pow10[-exp], true
   442  	}
   443  	return
   444  }
   445  
   446  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
   447  // entirely in floating-point math, do so, avoiding the machinery above.
   448  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   449  	if mantissa>>float32info.mantbits != 0 {
   450  		return
   451  	}
   452  	f = float32(mantissa)
   453  	if neg {
   454  		f = -f
   455  	}
   456  	switch {
   457  	case exp == 0:
   458  		return f, true
   459  	// Exact integers are <= 10^7.
   460  	// Exact powers of ten are <= 10^10.
   461  	case exp > 0 && exp <= 7+10: // int * 10^k
   462  		// If exponent is big but number of digits is not,
   463  		// can move a few zeros into the integer part.
   464  		if exp > 10 {
   465  			f *= float32pow10[exp-10]
   466  			exp = 10
   467  		}
   468  		if f > 1e7 || f < -1e7 {
   469  			// the exponent was really too large.
   470  			return
   471  		}
   472  		return f * float32pow10[exp], true
   473  	case exp < 0 && exp >= -10: // int / 10^k
   474  		return f / float32pow10[-exp], true
   475  	}
   476  	return
   477  }
   478  
   479  // atofHex converts the hex floating-point string s
   480  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   481  // and returns it as a float64.
   482  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   483  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   484  func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   485  	maxExp := 1<<flt.expbits + flt.bias - 2
   486  	minExp := flt.bias + 1
   487  	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
   488  
   489  	// Shift mantissa and exponent to bring representation into float range.
   490  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   491  	// For rounding, we need two more, where the bottom bit represents
   492  	// whether that bit or any later bit was non-zero.
   493  	// (If the mantissa has already lost non-zero bits, trunc is true,
   494  	// and we OR in a 1 below after shifting left appropriately.)
   495  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   496  		mantissa <<= 1
   497  		exp--
   498  	}
   499  	if trunc {
   500  		mantissa |= 1
   501  	}
   502  	for mantissa>>(1+flt.mantbits+2) != 0 {
   503  		mantissa = mantissa>>1 | mantissa&1
   504  		exp++
   505  	}
   506  
   507  	// If exponent is too negative,
   508  	// denormalize in hopes of making it representable.
   509  	// (The -2 is for the rounding bits.)
   510  	for mantissa > 1 && exp < minExp-2 {
   511  		mantissa = mantissa>>1 | mantissa&1
   512  		exp++
   513  	}
   514  
   515  	// Round using two bottom bits.
   516  	round := mantissa & 3
   517  	mantissa >>= 2
   518  	round |= mantissa & 1 // round to even (round up if mantissa is odd)
   519  	exp += 2
   520  	if round == 3 {
   521  		mantissa++
   522  		if mantissa == 1<<(1+flt.mantbits) {
   523  			mantissa >>= 1
   524  			exp++
   525  		}
   526  	}
   527  
   528  	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
   529  		exp = flt.bias
   530  	}
   531  	var err error
   532  	if exp > maxExp { // infinity and range error
   533  		mantissa = 1 << flt.mantbits
   534  		exp = maxExp + 1
   535  		err = rangeError(fnParseFloat, s)
   536  	}
   537  
   538  	bits := mantissa & (1<<flt.mantbits - 1)
   539  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   540  	if neg {
   541  		bits |= 1 << flt.mantbits << flt.expbits
   542  	}
   543  	if flt == &float32info {
   544  		return float64(math.Float32frombits(uint32(bits))), err
   545  	}
   546  	return math.Float64frombits(bits), err
   547  }
   548  
   549  const fnParseFloat = "ParseFloat"
   550  
   551  func atof32(s string) (f float32, err error) {
   552  	if val, ok := special(s); ok {
   553  		return float32(val), nil
   554  	}
   555  
   556  	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   557  	if hex && ok {
   558  		f, err := atofHex(s, &float32info, mantissa, exp, neg, trunc)
   559  		return float32(f), err
   560  	}
   561  
   562  	if optimize && ok {
   563  		// Try pure floating-point arithmetic conversion.
   564  		if !trunc {
   565  			if f, ok := atof32exact(mantissa, exp, neg); ok {
   566  				return f, nil
   567  			}
   568  		}
   569  		// Try another fast path.
   570  		ext := new(extFloat)
   571  		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float32info); ok {
   572  			b, ovf := ext.floatBits(&float32info)
   573  			f = math.Float32frombits(uint32(b))
   574  			if ovf {
   575  				err = rangeError(fnParseFloat, s)
   576  			}
   577  			return f, err
   578  		}
   579  	}
   580  
   581  	// Slow fallback.
   582  	var d decimal
   583  	if !d.set(s) {
   584  		return 0, syntaxError(fnParseFloat, s)
   585  	}
   586  	b, ovf := d.floatBits(&float32info)
   587  	f = math.Float32frombits(uint32(b))
   588  	if ovf {
   589  		err = rangeError(fnParseFloat, s)
   590  	}
   591  	return f, err
   592  }
   593  
   594  func atof64(s string) (f float64, err error) {
   595  	if val, ok := special(s); ok {
   596  		return val, nil
   597  	}
   598  
   599  	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   600  	if hex && ok {
   601  		return atofHex(s, &float64info, mantissa, exp, neg, trunc)
   602  	}
   603  
   604  	if optimize && ok {
   605  		// Try pure floating-point arithmetic conversion.
   606  		if !trunc {
   607  			if f, ok := atof64exact(mantissa, exp, neg); ok {
   608  				return f, nil
   609  			}
   610  		}
   611  		// Try another fast path.
   612  		ext := new(extFloat)
   613  		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float64info); ok {
   614  			b, ovf := ext.floatBits(&float64info)
   615  			f = math.Float64frombits(b)
   616  			if ovf {
   617  				err = rangeError(fnParseFloat, s)
   618  			}
   619  			return f, err
   620  		}
   621  	}
   622  
   623  	// Slow fallback.
   624  	var d decimal
   625  	if !d.set(s) {
   626  		return 0, syntaxError(fnParseFloat, s)
   627  	}
   628  	b, ovf := d.floatBits(&float64info)
   629  	f = math.Float64frombits(b)
   630  	if ovf {
   631  		err = rangeError(fnParseFloat, s)
   632  	}
   633  	return f, err
   634  }
   635  
   636  // ParseFloat converts the string s to a floating-point number
   637  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
   638  // When bitSize=32, the result still has type float64, but it will be
   639  // convertible to float32 without changing its value.
   640  //
   641  // ParseFloat accepts decimal and hexadecimal floating-point number syntax.
   642  // If s is well-formed and near a valid floating-point number,
   643  // ParseFloat returns the nearest floating-point number rounded
   644  // using IEEE754 unbiased rounding.
   645  // (Parsing a hexadecimal floating-point value only rounds when
   646  // there are more bits in the hexadecimal representation than
   647  // will fit in the mantissa.)
   648  //
   649  // The errors that ParseFloat returns have concrete type *NumError
   650  // and include err.Num = s.
   651  //
   652  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   653  //
   654  // If s is syntactically well-formed but is more than 1/2 ULP
   655  // away from the largest floating point number of the given size,
   656  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
   657  //
   658  // ParseFloat recognizes the strings "NaN", "+Inf", and "-Inf" as their
   659  // respective special floating point values. It ignores case when matching.
   660  func ParseFloat(s string, bitSize int) (float64, error) {
   661  	if !underscoreOK(s) {
   662  		return 0, syntaxError(fnParseFloat, s)
   663  	}
   664  	if bitSize == 32 {
   665  		f, err := atof32(s)
   666  		return float64(f), err
   667  	}
   668  	return atof64(s)
   669  }
   670  

View as plain text