...
Run Format

Source file src/math/big/float.go

Documentation: math/big

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements multi-precision floating-point numbers.
     6  // Like in the GNU MPFR library (http://www.mpfr.org/), operands
     7  // can be of mixed precision. Unlike MPFR, the rounding mode is
     8  // not specified with each operation, but with each operand. The
     9  // rounding mode of the result operand determines the rounding
    10  // mode of an operation. This is a from-scratch implementation.
    11  
    12  package big
    13  
    14  import (
    15  	"fmt"
    16  	"math"
    17  	"math/bits"
    18  )
    19  
    20  const debugFloat = false // enable for debugging
    21  
    22  // A nonzero finite Float represents a multi-precision floating point number
    23  //
    24  //   sign × mantissa × 2**exponent
    25  //
    26  // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
    27  // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
    28  // All Floats are ordered, and the ordering of two Floats x and y
    29  // is defined by x.Cmp(y).
    30  //
    31  // Each Float value also has a precision, rounding mode, and accuracy.
    32  // The precision is the maximum number of mantissa bits available to
    33  // represent the value. The rounding mode specifies how a result should
    34  // be rounded to fit into the mantissa bits, and accuracy describes the
    35  // rounding error with respect to the exact result.
    36  //
    37  // Unless specified otherwise, all operations (including setters) that
    38  // specify a *Float variable for the result (usually via the receiver
    39  // with the exception of MantExp), round the numeric result according
    40  // to the precision and rounding mode of the result variable.
    41  //
    42  // If the provided result precision is 0 (see below), it is set to the
    43  // precision of the argument with the largest precision value before any
    44  // rounding takes place, and the rounding mode remains unchanged. Thus,
    45  // uninitialized Floats provided as result arguments will have their
    46  // precision set to a reasonable value determined by the operands and
    47  // their mode is the zero value for RoundingMode (ToNearestEven).
    48  //
    49  // By setting the desired precision to 24 or 53 and using matching rounding
    50  // mode (typically ToNearestEven), Float operations produce the same results
    51  // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
    52  // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
    53  // Exponent underflow and overflow lead to a 0 or an Infinity for different
    54  // values than IEEE-754 because Float exponents have a much larger range.
    55  //
    56  // The zero (uninitialized) value for a Float is ready to use and represents
    57  // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
    58  //
    59  type Float struct {
    60  	prec uint32
    61  	mode RoundingMode
    62  	acc  Accuracy
    63  	form form
    64  	neg  bool
    65  	mant nat
    66  	exp  int32
    67  }
    68  
    69  // An ErrNaN panic is raised by a Float operation that would lead to
    70  // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
    71  type ErrNaN struct {
    72  	msg string
    73  }
    74  
    75  func (err ErrNaN) Error() string {
    76  	return err.msg
    77  }
    78  
    79  // NewFloat allocates and returns a new Float set to x,
    80  // with precision 53 and rounding mode ToNearestEven.
    81  // NewFloat panics with ErrNaN if x is a NaN.
    82  func NewFloat(x float64) *Float {
    83  	if math.IsNaN(x) {
    84  		panic(ErrNaN{"NewFloat(NaN)"})
    85  	}
    86  	return new(Float).SetFloat64(x)
    87  }
    88  
    89  // Exponent and precision limits.
    90  const (
    91  	MaxExp  = math.MaxInt32  // largest supported exponent
    92  	MinExp  = math.MinInt32  // smallest supported exponent
    93  	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
    94  )
    95  
    96  // Internal representation: The mantissa bits x.mant of a nonzero finite
    97  // Float x are stored in a nat slice long enough to hold up to x.prec bits;
    98  // the slice may (but doesn't have to) be shorter if the mantissa contains
    99  // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
   100  // the msb is shifted all the way "to the left"). Thus, if the mantissa has
   101  // trailing 0 bits or x.prec is not a multiple of the Word size _W,
   102  // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
   103  // to the value 0.5; the exponent x.exp shifts the binary point as needed.
   104  //
   105  // A zero or non-finite Float x ignores x.mant and x.exp.
   106  //
   107  // x                 form      neg      mant         exp
   108  // ----------------------------------------------------------
   109  // ±0                zero      sign     -            -
   110  // 0 < |x| < +Inf    finite    sign     mantissa     exponent
   111  // ±Inf              inf       sign     -            -
   112  
   113  // A form value describes the internal representation.
   114  type form byte
   115  
   116  // The form value order is relevant - do not change!
   117  const (
   118  	zero form = iota
   119  	finite
   120  	inf
   121  )
   122  
   123  // RoundingMode determines how a Float value is rounded to the
   124  // desired precision. Rounding may change the Float value; the
   125  // rounding error is described by the Float's Accuracy.
   126  type RoundingMode byte
   127  
   128  // These constants define supported rounding modes.
   129  const (
   130  	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
   131  	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
   132  	ToZero                            // == IEEE 754-2008 roundTowardZero
   133  	AwayFromZero                      // no IEEE 754-2008 equivalent
   134  	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
   135  	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
   136  )
   137  
   138  //go:generate stringer -type=RoundingMode
   139  
   140  // Accuracy describes the rounding error produced by the most recent
   141  // operation that generated a Float value, relative to the exact value.
   142  type Accuracy int8
   143  
   144  // Constants describing the Accuracy of a Float.
   145  const (
   146  	Below Accuracy = -1
   147  	Exact Accuracy = 0
   148  	Above Accuracy = +1
   149  )
   150  
   151  //go:generate stringer -type=Accuracy
   152  
   153  // SetPrec sets z's precision to prec and returns the (possibly) rounded
   154  // value of z. Rounding occurs according to z's rounding mode if the mantissa
   155  // cannot be represented in prec bits without loss of precision.
   156  // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
   157  // If prec > MaxPrec, it is set to MaxPrec.
   158  func (z *Float) SetPrec(prec uint) *Float {
   159  	z.acc = Exact // optimistically assume no rounding is needed
   160  
   161  	// special case
   162  	if prec == 0 {
   163  		z.prec = 0
   164  		if z.form == finite {
   165  			// truncate z to 0
   166  			z.acc = makeAcc(z.neg)
   167  			z.form = zero
   168  		}
   169  		return z
   170  	}
   171  
   172  	// general case
   173  	if prec > MaxPrec {
   174  		prec = MaxPrec
   175  	}
   176  	old := z.prec
   177  	z.prec = uint32(prec)
   178  	if z.prec < old {
   179  		z.round(0)
   180  	}
   181  	return z
   182  }
   183  
   184  func makeAcc(above bool) Accuracy {
   185  	if above {
   186  		return Above
   187  	}
   188  	return Below
   189  }
   190  
   191  // SetMode sets z's rounding mode to mode and returns an exact z.
   192  // z remains unchanged otherwise.
   193  // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
   194  func (z *Float) SetMode(mode RoundingMode) *Float {
   195  	z.mode = mode
   196  	z.acc = Exact
   197  	return z
   198  }
   199  
   200  // Prec returns the mantissa precision of x in bits.
   201  // The result may be 0 for |x| == 0 and |x| == Inf.
   202  func (x *Float) Prec() uint {
   203  	return uint(x.prec)
   204  }
   205  
   206  // MinPrec returns the minimum precision required to represent x exactly
   207  // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
   208  // The result is 0 for |x| == 0 and |x| == Inf.
   209  func (x *Float) MinPrec() uint {
   210  	if x.form != finite {
   211  		return 0
   212  	}
   213  	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
   214  }
   215  
   216  // Mode returns the rounding mode of x.
   217  func (x *Float) Mode() RoundingMode {
   218  	return x.mode
   219  }
   220  
   221  // Acc returns the accuracy of x produced by the most recent operation.
   222  func (x *Float) Acc() Accuracy {
   223  	return x.acc
   224  }
   225  
   226  // Sign returns:
   227  //
   228  //	-1 if x <   0
   229  //	 0 if x is ±0
   230  //	+1 if x >   0
   231  //
   232  func (x *Float) Sign() int {
   233  	if debugFloat {
   234  		x.validate()
   235  	}
   236  	if x.form == zero {
   237  		return 0
   238  	}
   239  	if x.neg {
   240  		return -1
   241  	}
   242  	return 1
   243  }
   244  
   245  // MantExp breaks x into its mantissa and exponent components
   246  // and returns the exponent. If a non-nil mant argument is
   247  // provided its value is set to the mantissa of x, with the
   248  // same precision and rounding mode as x. The components
   249  // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
   250  // Calling MantExp with a nil argument is an efficient way to
   251  // get the exponent of the receiver.
   252  //
   253  // Special cases are:
   254  //
   255  //	(  ±0).MantExp(mant) = 0, with mant set to   ±0
   256  //	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
   257  //
   258  // x and mant may be the same in which case x is set to its
   259  // mantissa value.
   260  func (x *Float) MantExp(mant *Float) (exp int) {
   261  	if debugFloat {
   262  		x.validate()
   263  	}
   264  	if x.form == finite {
   265  		exp = int(x.exp)
   266  	}
   267  	if mant != nil {
   268  		mant.Copy(x)
   269  		if mant.form == finite {
   270  			mant.exp = 0
   271  		}
   272  	}
   273  	return
   274  }
   275  
   276  func (z *Float) setExpAndRound(exp int64, sbit uint) {
   277  	if exp < MinExp {
   278  		// underflow
   279  		z.acc = makeAcc(z.neg)
   280  		z.form = zero
   281  		return
   282  	}
   283  
   284  	if exp > MaxExp {
   285  		// overflow
   286  		z.acc = makeAcc(!z.neg)
   287  		z.form = inf
   288  		return
   289  	}
   290  
   291  	z.form = finite
   292  	z.exp = int32(exp)
   293  	z.round(sbit)
   294  }
   295  
   296  // SetMantExp sets z to mant × 2**exp and and returns z.
   297  // The result z has the same precision and rounding mode
   298  // as mant. SetMantExp is an inverse of MantExp but does
   299  // not require 0.5 <= |mant| < 1.0. Specifically:
   300  //
   301  //	mant := new(Float)
   302  //	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
   303  //
   304  // Special cases are:
   305  //
   306  //	z.SetMantExp(  ±0, exp) =   ±0
   307  //	z.SetMantExp(±Inf, exp) = ±Inf
   308  //
   309  // z and mant may be the same in which case z's exponent
   310  // is set to exp.
   311  func (z *Float) SetMantExp(mant *Float, exp int) *Float {
   312  	if debugFloat {
   313  		z.validate()
   314  		mant.validate()
   315  	}
   316  	z.Copy(mant)
   317  	if z.form != finite {
   318  		return z
   319  	}
   320  	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
   321  	return z
   322  }
   323  
   324  // Signbit returns true if x is negative or negative zero.
   325  func (x *Float) Signbit() bool {
   326  	return x.neg
   327  }
   328  
   329  // IsInf reports whether x is +Inf or -Inf.
   330  func (x *Float) IsInf() bool {
   331  	return x.form == inf
   332  }
   333  
   334  // IsInt reports whether x is an integer.
   335  // ±Inf values are not integers.
   336  func (x *Float) IsInt() bool {
   337  	if debugFloat {
   338  		x.validate()
   339  	}
   340  	// special cases
   341  	if x.form != finite {
   342  		return x.form == zero
   343  	}
   344  	// x.form == finite
   345  	if x.exp <= 0 {
   346  		return false
   347  	}
   348  	// x.exp > 0
   349  	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
   350  }
   351  
   352  // debugging support
   353  func (x *Float) validate() {
   354  	if !debugFloat {
   355  		// avoid performance bugs
   356  		panic("validate called but debugFloat is not set")
   357  	}
   358  	if x.form != finite {
   359  		return
   360  	}
   361  	m := len(x.mant)
   362  	if m == 0 {
   363  		panic("nonzero finite number with empty mantissa")
   364  	}
   365  	const msb = 1 << (_W - 1)
   366  	if x.mant[m-1]&msb == 0 {
   367  		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
   368  	}
   369  	if x.prec == 0 {
   370  		panic("zero precision finite number")
   371  	}
   372  }
   373  
   374  // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
   375  // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
   376  // have before calling round. z's mantissa must be normalized (with the msb set)
   377  // or empty.
   378  //
   379  // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
   380  // sign of z. For correct rounding, the sign of z must be set correctly before
   381  // calling round.
   382  func (z *Float) round(sbit uint) {
   383  	if debugFloat {
   384  		z.validate()
   385  	}
   386  
   387  	z.acc = Exact
   388  	if z.form != finite {
   389  		// ±0 or ±Inf => nothing left to do
   390  		return
   391  	}
   392  	// z.form == finite && len(z.mant) > 0
   393  	// m > 0 implies z.prec > 0 (checked by validate)
   394  
   395  	m := uint32(len(z.mant)) // present mantissa length in words
   396  	bits := m * _W           // present mantissa bits; bits > 0
   397  	if bits <= z.prec {
   398  		// mantissa fits => nothing to do
   399  		return
   400  	}
   401  	// bits > z.prec
   402  
   403  	// Rounding is based on two bits: the rounding bit (rbit) and the
   404  	// sticky bit (sbit). The rbit is the bit immediately before the
   405  	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
   406  	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
   407  	//
   408  	//   rbit  sbit  => "fractional part"
   409  	//
   410  	//   0     0        == 0
   411  	//   0     1        >  0  , < 0.5
   412  	//   1     0        == 0.5
   413  	//   1     1        >  0.5, < 1.0
   414  
   415  	// bits > z.prec: mantissa too large => round
   416  	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
   417  	rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
   418  	// The sticky bit is only needed for rounding ToNearestEven
   419  	// or when the rounding bit is zero. Avoid computation otherwise.
   420  	if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) {
   421  		sbit = z.mant.sticky(r)
   422  	}
   423  	sbit &= 1 // be safe and ensure it's a single bit
   424  
   425  	// cut off extra words
   426  	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
   427  	if m > n {
   428  		copy(z.mant, z.mant[m-n:]) // move n last words to front
   429  		z.mant = z.mant[:n]
   430  	}
   431  
   432  	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
   433  	ntz := n*_W - z.prec // 0 <= ntz < _W
   434  	lsb := Word(1) << ntz
   435  
   436  	// round if result is inexact
   437  	if rbit|sbit != 0 {
   438  		// Make rounding decision: The result mantissa is truncated ("rounded down")
   439  		// by default. Decide if we need to increment, or "round up", the (unsigned)
   440  		// mantissa.
   441  		inc := false
   442  		switch z.mode {
   443  		case ToNegativeInf:
   444  			inc = z.neg
   445  		case ToZero:
   446  			// nothing to do
   447  		case ToNearestEven:
   448  			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
   449  		case ToNearestAway:
   450  			inc = rbit != 0
   451  		case AwayFromZero:
   452  			inc = true
   453  		case ToPositiveInf:
   454  			inc = !z.neg
   455  		default:
   456  			panic("unreachable")
   457  		}
   458  
   459  		// A positive result (!z.neg) is Above the exact result if we increment,
   460  		// and it's Below if we truncate (Exact results require no rounding).
   461  		// For a negative result (z.neg) it is exactly the opposite.
   462  		z.acc = makeAcc(inc != z.neg)
   463  
   464  		if inc {
   465  			// add 1 to mantissa
   466  			if addVW(z.mant, z.mant, lsb) != 0 {
   467  				// mantissa overflow => adjust exponent
   468  				if z.exp >= MaxExp {
   469  					// exponent overflow
   470  					z.form = inf
   471  					return
   472  				}
   473  				z.exp++
   474  				// adjust mantissa: divide by 2 to compensate for exponent adjustment
   475  				shrVU(z.mant, z.mant, 1)
   476  				// set msb == carry == 1 from the mantissa overflow above
   477  				const msb = 1 << (_W - 1)
   478  				z.mant[n-1] |= msb
   479  			}
   480  		}
   481  	}
   482  
   483  	// zero out trailing bits in least-significant word
   484  	z.mant[0] &^= lsb - 1
   485  
   486  	if debugFloat {
   487  		z.validate()
   488  	}
   489  }
   490  
   491  func (z *Float) setBits64(neg bool, x uint64) *Float {
   492  	if z.prec == 0 {
   493  		z.prec = 64
   494  	}
   495  	z.acc = Exact
   496  	z.neg = neg
   497  	if x == 0 {
   498  		z.form = zero
   499  		return z
   500  	}
   501  	// x != 0
   502  	z.form = finite
   503  	s := bits.LeadingZeros64(x)
   504  	z.mant = z.mant.setUint64(x << uint(s))
   505  	z.exp = int32(64 - s) // always fits
   506  	if z.prec < 64 {
   507  		z.round(0)
   508  	}
   509  	return z
   510  }
   511  
   512  // SetUint64 sets z to the (possibly rounded) value of x and returns z.
   513  // If z's precision is 0, it is changed to 64 (and rounding will have
   514  // no effect).
   515  func (z *Float) SetUint64(x uint64) *Float {
   516  	return z.setBits64(false, x)
   517  }
   518  
   519  // SetInt64 sets z to the (possibly rounded) value of x and returns z.
   520  // If z's precision is 0, it is changed to 64 (and rounding will have
   521  // no effect).
   522  func (z *Float) SetInt64(x int64) *Float {
   523  	u := x
   524  	if u < 0 {
   525  		u = -u
   526  	}
   527  	// We cannot simply call z.SetUint64(uint64(u)) and change
   528  	// the sign afterwards because the sign affects rounding.
   529  	return z.setBits64(x < 0, uint64(u))
   530  }
   531  
   532  // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
   533  // If z's precision is 0, it is changed to 53 (and rounding will have
   534  // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
   535  func (z *Float) SetFloat64(x float64) *Float {
   536  	if z.prec == 0 {
   537  		z.prec = 53
   538  	}
   539  	if math.IsNaN(x) {
   540  		panic(ErrNaN{"Float.SetFloat64(NaN)"})
   541  	}
   542  	z.acc = Exact
   543  	z.neg = math.Signbit(x) // handle -0, -Inf correctly
   544  	if x == 0 {
   545  		z.form = zero
   546  		return z
   547  	}
   548  	if math.IsInf(x, 0) {
   549  		z.form = inf
   550  		return z
   551  	}
   552  	// normalized x != 0
   553  	z.form = finite
   554  	fmant, exp := math.Frexp(x) // get normalized mantissa
   555  	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
   556  	z.exp = int32(exp) // always fits
   557  	if z.prec < 53 {
   558  		z.round(0)
   559  	}
   560  	return z
   561  }
   562  
   563  // fnorm normalizes mantissa m by shifting it to the left
   564  // such that the msb of the most-significant word (msw) is 1.
   565  // It returns the shift amount. It assumes that len(m) != 0.
   566  func fnorm(m nat) int64 {
   567  	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
   568  		panic("msw of mantissa is 0")
   569  	}
   570  	s := nlz(m[len(m)-1])
   571  	if s > 0 {
   572  		c := shlVU(m, m, s)
   573  		if debugFloat && c != 0 {
   574  			panic("nlz or shlVU incorrect")
   575  		}
   576  	}
   577  	return int64(s)
   578  }
   579  
   580  // SetInt sets z to the (possibly rounded) value of x and returns z.
   581  // If z's precision is 0, it is changed to the larger of x.BitLen()
   582  // or 64 (and rounding will have no effect).
   583  func (z *Float) SetInt(x *Int) *Float {
   584  	// TODO(gri) can be more efficient if z.prec > 0
   585  	// but small compared to the size of x, or if there
   586  	// are many trailing 0's.
   587  	bits := uint32(x.BitLen())
   588  	if z.prec == 0 {
   589  		z.prec = umax32(bits, 64)
   590  	}
   591  	z.acc = Exact
   592  	z.neg = x.neg
   593  	if len(x.abs) == 0 {
   594  		z.form = zero
   595  		return z
   596  	}
   597  	// x != 0
   598  	z.mant = z.mant.set(x.abs)
   599  	fnorm(z.mant)
   600  	z.setExpAndRound(int64(bits), 0)
   601  	return z
   602  }
   603  
   604  // SetRat sets z to the (possibly rounded) value of x and returns z.
   605  // If z's precision is 0, it is changed to the largest of a.BitLen(),
   606  // b.BitLen(), or 64; with x = a/b.
   607  func (z *Float) SetRat(x *Rat) *Float {
   608  	if x.IsInt() {
   609  		return z.SetInt(x.Num())
   610  	}
   611  	var a, b Float
   612  	a.SetInt(x.Num())
   613  	b.SetInt(x.Denom())
   614  	if z.prec == 0 {
   615  		z.prec = umax32(a.prec, b.prec)
   616  	}
   617  	return z.Quo(&a, &b)
   618  }
   619  
   620  // SetInf sets z to the infinite Float -Inf if signbit is
   621  // set, or +Inf if signbit is not set, and returns z. The
   622  // precision of z is unchanged and the result is always
   623  // Exact.
   624  func (z *Float) SetInf(signbit bool) *Float {
   625  	z.acc = Exact
   626  	z.form = inf
   627  	z.neg = signbit
   628  	return z
   629  }
   630  
   631  // Set sets z to the (possibly rounded) value of x and returns z.
   632  // If z's precision is 0, it is changed to the precision of x
   633  // before setting z (and rounding will have no effect).
   634  // Rounding is performed according to z's precision and rounding
   635  // mode; and z's accuracy reports the result error relative to the
   636  // exact (not rounded) result.
   637  func (z *Float) Set(x *Float) *Float {
   638  	if debugFloat {
   639  		x.validate()
   640  	}
   641  	z.acc = Exact
   642  	if z != x {
   643  		z.form = x.form
   644  		z.neg = x.neg
   645  		if x.form == finite {
   646  			z.exp = x.exp
   647  			z.mant = z.mant.set(x.mant)
   648  		}
   649  		if z.prec == 0 {
   650  			z.prec = x.prec
   651  		} else if z.prec < x.prec {
   652  			z.round(0)
   653  		}
   654  	}
   655  	return z
   656  }
   657  
   658  // Copy sets z to x, with the same precision, rounding mode, and
   659  // accuracy as x, and returns z. x is not changed even if z and
   660  // x are the same.
   661  func (z *Float) Copy(x *Float) *Float {
   662  	if debugFloat {
   663  		x.validate()
   664  	}
   665  	if z != x {
   666  		z.prec = x.prec
   667  		z.mode = x.mode
   668  		z.acc = x.acc
   669  		z.form = x.form
   670  		z.neg = x.neg
   671  		if z.form == finite {
   672  			z.mant = z.mant.set(x.mant)
   673  			z.exp = x.exp
   674  		}
   675  	}
   676  	return z
   677  }
   678  
   679  // msb32 returns the 32 most significant bits of x.
   680  func msb32(x nat) uint32 {
   681  	i := len(x) - 1
   682  	if i < 0 {
   683  		return 0
   684  	}
   685  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   686  		panic("x not normalized")
   687  	}
   688  	switch _W {
   689  	case 32:
   690  		return uint32(x[i])
   691  	case 64:
   692  		return uint32(x[i] >> 32)
   693  	}
   694  	panic("unreachable")
   695  }
   696  
   697  // msb64 returns the 64 most significant bits of x.
   698  func msb64(x nat) uint64 {
   699  	i := len(x) - 1
   700  	if i < 0 {
   701  		return 0
   702  	}
   703  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   704  		panic("x not normalized")
   705  	}
   706  	switch _W {
   707  	case 32:
   708  		v := uint64(x[i]) << 32
   709  		if i > 0 {
   710  			v |= uint64(x[i-1])
   711  		}
   712  		return v
   713  	case 64:
   714  		return uint64(x[i])
   715  	}
   716  	panic("unreachable")
   717  }
   718  
   719  // Uint64 returns the unsigned integer resulting from truncating x
   720  // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
   721  // if x is an integer and Below otherwise.
   722  // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
   723  // for x > math.MaxUint64.
   724  func (x *Float) Uint64() (uint64, Accuracy) {
   725  	if debugFloat {
   726  		x.validate()
   727  	}
   728  
   729  	switch x.form {
   730  	case finite:
   731  		if x.neg {
   732  			return 0, Above
   733  		}
   734  		// 0 < x < +Inf
   735  		if x.exp <= 0 {
   736  			// 0 < x < 1
   737  			return 0, Below
   738  		}
   739  		// 1 <= x < Inf
   740  		if x.exp <= 64 {
   741  			// u = trunc(x) fits into a uint64
   742  			u := msb64(x.mant) >> (64 - uint32(x.exp))
   743  			if x.MinPrec() <= 64 {
   744  				return u, Exact
   745  			}
   746  			return u, Below // x truncated
   747  		}
   748  		// x too large
   749  		return math.MaxUint64, Below
   750  
   751  	case zero:
   752  		return 0, Exact
   753  
   754  	case inf:
   755  		if x.neg {
   756  			return 0, Above
   757  		}
   758  		return math.MaxUint64, Below
   759  	}
   760  
   761  	panic("unreachable")
   762  }
   763  
   764  // Int64 returns the integer resulting from truncating x towards zero.
   765  // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
   766  // an integer, and Above (x < 0) or Below (x > 0) otherwise.
   767  // The result is (math.MinInt64, Above) for x < math.MinInt64,
   768  // and (math.MaxInt64, Below) for x > math.MaxInt64.
   769  func (x *Float) Int64() (int64, Accuracy) {
   770  	if debugFloat {
   771  		x.validate()
   772  	}
   773  
   774  	switch x.form {
   775  	case finite:
   776  		// 0 < |x| < +Inf
   777  		acc := makeAcc(x.neg)
   778  		if x.exp <= 0 {
   779  			// 0 < |x| < 1
   780  			return 0, acc
   781  		}
   782  		// x.exp > 0
   783  
   784  		// 1 <= |x| < +Inf
   785  		if x.exp <= 63 {
   786  			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
   787  			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
   788  			if x.neg {
   789  				i = -i
   790  			}
   791  			if x.MinPrec() <= uint(x.exp) {
   792  				return i, Exact
   793  			}
   794  			return i, acc // x truncated
   795  		}
   796  		if x.neg {
   797  			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
   798  			if x.exp == 64 && x.MinPrec() == 1 {
   799  				acc = Exact
   800  			}
   801  			return math.MinInt64, acc
   802  		}
   803  		// x too large
   804  		return math.MaxInt64, Below
   805  
   806  	case zero:
   807  		return 0, Exact
   808  
   809  	case inf:
   810  		if x.neg {
   811  			return math.MinInt64, Above
   812  		}
   813  		return math.MaxInt64, Below
   814  	}
   815  
   816  	panic("unreachable")
   817  }
   818  
   819  // Float32 returns the float32 value nearest to x. If x is too small to be
   820  // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
   821  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   822  // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
   823  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   824  func (x *Float) Float32() (float32, Accuracy) {
   825  	if debugFloat {
   826  		x.validate()
   827  	}
   828  
   829  	switch x.form {
   830  	case finite:
   831  		// 0 < |x| < +Inf
   832  
   833  		const (
   834  			fbits = 32                //        float size
   835  			mbits = 23                //        mantissa size (excluding implicit msb)
   836  			ebits = fbits - mbits - 1 //     8  exponent size
   837  			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
   838  			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
   839  			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
   840  			emax  = bias              //   127  largest unbiased exponent (normal)
   841  		)
   842  
   843  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
   844  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   845  
   846  		// Compute precision p for float32 mantissa.
   847  		// If the exponent is too small, we have a denormal number before
   848  		// rounding and fewer than p mantissa bits of precision available
   849  		// (the exponent remains fixed but the mantissa gets shifted right).
   850  		p := mbits + 1 // precision of normal float
   851  		if e < emin {
   852  			// recompute precision
   853  			p = mbits + 1 - emin + int(e)
   854  			// If p == 0, the mantissa of x is shifted so much to the right
   855  			// that its msb falls immediately to the right of the float32
   856  			// mantissa space. In other words, if the smallest denormal is
   857  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   858  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   859  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
   860  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   861  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   862  				// underflow to ±0
   863  				if x.neg {
   864  					var z float32
   865  					return -z, Above
   866  				}
   867  				return 0.0, Below
   868  			}
   869  			// otherwise, round up
   870  			// We handle p == 0 explicitly because it's easy and because
   871  			// Float.round doesn't support rounding to 0 bits of precision.
   872  			if p == 0 {
   873  				if x.neg {
   874  					return -math.SmallestNonzeroFloat32, Below
   875  				}
   876  				return math.SmallestNonzeroFloat32, Above
   877  			}
   878  		}
   879  		// p > 0
   880  
   881  		// round
   882  		var r Float
   883  		r.prec = uint32(p)
   884  		r.Set(x)
   885  		e = r.exp - 1
   886  
   887  		// Rounding may have caused r to overflow to ±Inf
   888  		// (rounding never causes underflows to 0).
   889  		// If the exponent is too large, also overflow to ±Inf.
   890  		if r.form == inf || e > emax {
   891  			// overflow
   892  			if x.neg {
   893  				return float32(math.Inf(-1)), Below
   894  			}
   895  			return float32(math.Inf(+1)), Above
   896  		}
   897  		// e <= emax
   898  
   899  		// Determine sign, biased exponent, and mantissa.
   900  		var sign, bexp, mant uint32
   901  		if x.neg {
   902  			sign = 1 << (fbits - 1)
   903  		}
   904  
   905  		// Rounding may have caused a denormal number to
   906  		// become normal. Check again.
   907  		if e < emin {
   908  			// denormal number: recompute precision
   909  			// Since rounding may have at best increased precision
   910  			// and we have eliminated p <= 0 early, we know p > 0.
   911  			// bexp == 0 for denormals
   912  			p = mbits + 1 - emin + int(e)
   913  			mant = msb32(r.mant) >> uint(fbits-p)
   914  		} else {
   915  			// normal number: emin <= e <= emax
   916  			bexp = uint32(e+bias) << mbits
   917  			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   918  		}
   919  
   920  		return math.Float32frombits(sign | bexp | mant), r.acc
   921  
   922  	case zero:
   923  		if x.neg {
   924  			var z float32
   925  			return -z, Exact
   926  		}
   927  		return 0.0, Exact
   928  
   929  	case inf:
   930  		if x.neg {
   931  			return float32(math.Inf(-1)), Exact
   932  		}
   933  		return float32(math.Inf(+1)), Exact
   934  	}
   935  
   936  	panic("unreachable")
   937  }
   938  
   939  // Float64 returns the float64 value nearest to x. If x is too small to be
   940  // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
   941  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   942  // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
   943  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   944  func (x *Float) Float64() (float64, Accuracy) {
   945  	if debugFloat {
   946  		x.validate()
   947  	}
   948  
   949  	switch x.form {
   950  	case finite:
   951  		// 0 < |x| < +Inf
   952  
   953  		const (
   954  			fbits = 64                //        float size
   955  			mbits = 52                //        mantissa size (excluding implicit msb)
   956  			ebits = fbits - mbits - 1 //    11  exponent size
   957  			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
   958  			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
   959  			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
   960  			emax  = bias              //  1023  largest unbiased exponent (normal)
   961  		)
   962  
   963  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
   964  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   965  
   966  		// Compute precision p for float64 mantissa.
   967  		// If the exponent is too small, we have a denormal number before
   968  		// rounding and fewer than p mantissa bits of precision available
   969  		// (the exponent remains fixed but the mantissa gets shifted right).
   970  		p := mbits + 1 // precision of normal float
   971  		if e < emin {
   972  			// recompute precision
   973  			p = mbits + 1 - emin + int(e)
   974  			// If p == 0, the mantissa of x is shifted so much to the right
   975  			// that its msb falls immediately to the right of the float64
   976  			// mantissa space. In other words, if the smallest denormal is
   977  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   978  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   979  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
   980  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   981  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   982  				// underflow to ±0
   983  				if x.neg {
   984  					var z float64
   985  					return -z, Above
   986  				}
   987  				return 0.0, Below
   988  			}
   989  			// otherwise, round up
   990  			// We handle p == 0 explicitly because it's easy and because
   991  			// Float.round doesn't support rounding to 0 bits of precision.
   992  			if p == 0 {
   993  				if x.neg {
   994  					return -math.SmallestNonzeroFloat64, Below
   995  				}
   996  				return math.SmallestNonzeroFloat64, Above
   997  			}
   998  		}
   999  		// p > 0
  1000  
  1001  		// round
  1002  		var r Float
  1003  		r.prec = uint32(p)
  1004  		r.Set(x)
  1005  		e = r.exp - 1
  1006  
  1007  		// Rounding may have caused r to overflow to ±Inf
  1008  		// (rounding never causes underflows to 0).
  1009  		// If the exponent is too large, also overflow to ±Inf.
  1010  		if r.form == inf || e > emax {
  1011  			// overflow
  1012  			if x.neg {
  1013  				return math.Inf(-1), Below
  1014  			}
  1015  			return math.Inf(+1), Above
  1016  		}
  1017  		// e <= emax
  1018  
  1019  		// Determine sign, biased exponent, and mantissa.
  1020  		var sign, bexp, mant uint64
  1021  		if x.neg {
  1022  			sign = 1 << (fbits - 1)
  1023  		}
  1024  
  1025  		// Rounding may have caused a denormal number to
  1026  		// become normal. Check again.
  1027  		if e < emin {
  1028  			// denormal number: recompute precision
  1029  			// Since rounding may have at best increased precision
  1030  			// and we have eliminated p <= 0 early, we know p > 0.
  1031  			// bexp == 0 for denormals
  1032  			p = mbits + 1 - emin + int(e)
  1033  			mant = msb64(r.mant) >> uint(fbits-p)
  1034  		} else {
  1035  			// normal number: emin <= e <= emax
  1036  			bexp = uint64(e+bias) << mbits
  1037  			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  1038  		}
  1039  
  1040  		return math.Float64frombits(sign | bexp | mant), r.acc
  1041  
  1042  	case zero:
  1043  		if x.neg {
  1044  			var z float64
  1045  			return -z, Exact
  1046  		}
  1047  		return 0.0, Exact
  1048  
  1049  	case inf:
  1050  		if x.neg {
  1051  			return math.Inf(-1), Exact
  1052  		}
  1053  		return math.Inf(+1), Exact
  1054  	}
  1055  
  1056  	panic("unreachable")
  1057  }
  1058  
  1059  // Int returns the result of truncating x towards zero;
  1060  // or nil if x is an infinity.
  1061  // The result is Exact if x.IsInt(); otherwise it is Below
  1062  // for x > 0, and Above for x < 0.
  1063  // If a non-nil *Int argument z is provided, Int stores
  1064  // the result in z instead of allocating a new Int.
  1065  func (x *Float) Int(z *Int) (*Int, Accuracy) {
  1066  	if debugFloat {
  1067  		x.validate()
  1068  	}
  1069  
  1070  	if z == nil && x.form <= finite {
  1071  		z = new(Int)
  1072  	}
  1073  
  1074  	switch x.form {
  1075  	case finite:
  1076  		// 0 < |x| < +Inf
  1077  		acc := makeAcc(x.neg)
  1078  		if x.exp <= 0 {
  1079  			// 0 < |x| < 1
  1080  			return z.SetInt64(0), acc
  1081  		}
  1082  		// x.exp > 0
  1083  
  1084  		// 1 <= |x| < +Inf
  1085  		// determine minimum required precision for x
  1086  		allBits := uint(len(x.mant)) * _W
  1087  		exp := uint(x.exp)
  1088  		if x.MinPrec() <= exp {
  1089  			acc = Exact
  1090  		}
  1091  		// shift mantissa as needed
  1092  		if z == nil {
  1093  			z = new(Int)
  1094  		}
  1095  		z.neg = x.neg
  1096  		switch {
  1097  		case exp > allBits:
  1098  			z.abs = z.abs.shl(x.mant, exp-allBits)
  1099  		default:
  1100  			z.abs = z.abs.set(x.mant)
  1101  		case exp < allBits:
  1102  			z.abs = z.abs.shr(x.mant, allBits-exp)
  1103  		}
  1104  		return z, acc
  1105  
  1106  	case zero:
  1107  		return z.SetInt64(0), Exact
  1108  
  1109  	case inf:
  1110  		return nil, makeAcc(x.neg)
  1111  	}
  1112  
  1113  	panic("unreachable")
  1114  }
  1115  
  1116  // Rat returns the rational number corresponding to x;
  1117  // or nil if x is an infinity.
  1118  // The result is Exact if x is not an Inf.
  1119  // If a non-nil *Rat argument z is provided, Rat stores
  1120  // the result in z instead of allocating a new Rat.
  1121  func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
  1122  	if debugFloat {
  1123  		x.validate()
  1124  	}
  1125  
  1126  	if z == nil && x.form <= finite {
  1127  		z = new(Rat)
  1128  	}
  1129  
  1130  	switch x.form {
  1131  	case finite:
  1132  		// 0 < |x| < +Inf
  1133  		allBits := int32(len(x.mant)) * _W
  1134  		// build up numerator and denominator
  1135  		z.a.neg = x.neg
  1136  		switch {
  1137  		case x.exp > allBits:
  1138  			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
  1139  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1140  			// z already in normal form
  1141  		default:
  1142  			z.a.abs = z.a.abs.set(x.mant)
  1143  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1144  			// z already in normal form
  1145  		case x.exp < allBits:
  1146  			z.a.abs = z.a.abs.set(x.mant)
  1147  			t := z.b.abs.setUint64(1)
  1148  			z.b.abs = t.shl(t, uint(allBits-x.exp))
  1149  			z.norm()
  1150  		}
  1151  		return z, Exact
  1152  
  1153  	case zero:
  1154  		return z.SetInt64(0), Exact
  1155  
  1156  	case inf:
  1157  		return nil, makeAcc(x.neg)
  1158  	}
  1159  
  1160  	panic("unreachable")
  1161  }
  1162  
  1163  // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
  1164  // and returns z.
  1165  func (z *Float) Abs(x *Float) *Float {
  1166  	z.Set(x)
  1167  	z.neg = false
  1168  	return z
  1169  }
  1170  
  1171  // Neg sets z to the (possibly rounded) value of x with its sign negated,
  1172  // and returns z.
  1173  func (z *Float) Neg(x *Float) *Float {
  1174  	z.Set(x)
  1175  	z.neg = !z.neg
  1176  	return z
  1177  }
  1178  
  1179  func validateBinaryOperands(x, y *Float) {
  1180  	if !debugFloat {
  1181  		// avoid performance bugs
  1182  		panic("validateBinaryOperands called but debugFloat is not set")
  1183  	}
  1184  	if len(x.mant) == 0 {
  1185  		panic("empty mantissa for x")
  1186  	}
  1187  	if len(y.mant) == 0 {
  1188  		panic("empty mantissa for y")
  1189  	}
  1190  }
  1191  
  1192  // z = x + y, ignoring signs of x and y for the addition
  1193  // but using the sign of z for rounding the result.
  1194  // x and y must have a non-empty mantissa and valid exponent.
  1195  func (z *Float) uadd(x, y *Float) {
  1196  	// Note: This implementation requires 2 shifts most of the
  1197  	// time. It is also inefficient if exponents or precisions
  1198  	// differ by wide margins. The following article describes
  1199  	// an efficient (but much more complicated) implementation
  1200  	// compatible with the internal representation used here:
  1201  	//
  1202  	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
  1203  	// Point Addition With Exact Rounding (as in the MPFR Library)"
  1204  	// http://www.vinc17.net/research/papers/rnc6.pdf
  1205  
  1206  	if debugFloat {
  1207  		validateBinaryOperands(x, y)
  1208  	}
  1209  
  1210  	// compute exponents ex, ey for mantissa with "binary point"
  1211  	// on the right (mantissa.0) - use int64 to avoid overflow
  1212  	ex := int64(x.exp) - int64(len(x.mant))*_W
  1213  	ey := int64(y.exp) - int64(len(y.mant))*_W
  1214  
  1215  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1216  
  1217  	// TODO(gri) having a combined add-and-shift primitive
  1218  	//           could make this code significantly faster
  1219  	switch {
  1220  	case ex < ey:
  1221  		if al {
  1222  			t := nat(nil).shl(y.mant, uint(ey-ex))
  1223  			z.mant = z.mant.add(x.mant, t)
  1224  		} else {
  1225  			z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1226  			z.mant = z.mant.add(x.mant, z.mant)
  1227  		}
  1228  	default:
  1229  		// ex == ey, no shift needed
  1230  		z.mant = z.mant.add(x.mant, y.mant)
  1231  	case ex > ey:
  1232  		if al {
  1233  			t := nat(nil).shl(x.mant, uint(ex-ey))
  1234  			z.mant = z.mant.add(t, y.mant)
  1235  		} else {
  1236  			z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1237  			z.mant = z.mant.add(z.mant, y.mant)
  1238  		}
  1239  		ex = ey
  1240  	}
  1241  	// len(z.mant) > 0
  1242  
  1243  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1244  }
  1245  
  1246  // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
  1247  // but using the sign of z for rounding the result.
  1248  // x and y must have a non-empty mantissa and valid exponent.
  1249  func (z *Float) usub(x, y *Float) {
  1250  	// This code is symmetric to uadd.
  1251  	// We have not factored the common code out because
  1252  	// eventually uadd (and usub) should be optimized
  1253  	// by special-casing, and the code will diverge.
  1254  
  1255  	if debugFloat {
  1256  		validateBinaryOperands(x, y)
  1257  	}
  1258  
  1259  	ex := int64(x.exp) - int64(len(x.mant))*_W
  1260  	ey := int64(y.exp) - int64(len(y.mant))*_W
  1261  
  1262  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1263  
  1264  	switch {
  1265  	case ex < ey:
  1266  		if al {
  1267  			t := nat(nil).shl(y.mant, uint(ey-ex))
  1268  			z.mant = t.sub(x.mant, t)
  1269  		} else {
  1270  			z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1271  			z.mant = z.mant.sub(x.mant, z.mant)
  1272  		}
  1273  	default:
  1274  		// ex == ey, no shift needed
  1275  		z.mant = z.mant.sub(x.mant, y.mant)
  1276  	case ex > ey:
  1277  		if al {
  1278  			t := nat(nil).shl(x.mant, uint(ex-ey))
  1279  			z.mant = t.sub(t, y.mant)
  1280  		} else {
  1281  			z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1282  			z.mant = z.mant.sub(z.mant, y.mant)
  1283  		}
  1284  		ex = ey
  1285  	}
  1286  
  1287  	// operands may have canceled each other out
  1288  	if len(z.mant) == 0 {
  1289  		z.acc = Exact
  1290  		z.form = zero
  1291  		z.neg = false
  1292  		return
  1293  	}
  1294  	// len(z.mant) > 0
  1295  
  1296  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1297  }
  1298  
  1299  // z = x * y, ignoring signs of x and y for the multiplication
  1300  // but using the sign of z for rounding the result.
  1301  // x and y must have a non-empty mantissa and valid exponent.
  1302  func (z *Float) umul(x, y *Float) {
  1303  	if debugFloat {
  1304  		validateBinaryOperands(x, y)
  1305  	}
  1306  
  1307  	// Note: This is doing too much work if the precision
  1308  	// of z is less than the sum of the precisions of x
  1309  	// and y which is often the case (e.g., if all floats
  1310  	// have the same precision).
  1311  	// TODO(gri) Optimize this for the common case.
  1312  
  1313  	e := int64(x.exp) + int64(y.exp)
  1314  	if x == y {
  1315  		z.mant = z.mant.sqr(x.mant)
  1316  	} else {
  1317  		z.mant = z.mant.mul(x.mant, y.mant)
  1318  	}
  1319  	z.setExpAndRound(e-fnorm(z.mant), 0)
  1320  }
  1321  
  1322  // z = x / y, ignoring signs of x and y for the division
  1323  // but using the sign of z for rounding the result.
  1324  // x and y must have a non-empty mantissa and valid exponent.
  1325  func (z *Float) uquo(x, y *Float) {
  1326  	if debugFloat {
  1327  		validateBinaryOperands(x, y)
  1328  	}
  1329  
  1330  	// mantissa length in words for desired result precision + 1
  1331  	// (at least one extra bit so we get the rounding bit after
  1332  	// the division)
  1333  	n := int(z.prec/_W) + 1
  1334  
  1335  	// compute adjusted x.mant such that we get enough result precision
  1336  	xadj := x.mant
  1337  	if d := n - len(x.mant) + len(y.mant); d > 0 {
  1338  		// d extra words needed => add d "0 digits" to x
  1339  		xadj = make(nat, len(x.mant)+d)
  1340  		copy(xadj[d:], x.mant)
  1341  	}
  1342  	// TODO(gri): If we have too many digits (d < 0), we should be able
  1343  	// to shorten x for faster division. But we must be extra careful
  1344  	// with rounding in that case.
  1345  
  1346  	// Compute d before division since there may be aliasing of x.mant
  1347  	// (via xadj) or y.mant with z.mant.
  1348  	d := len(xadj) - len(y.mant)
  1349  
  1350  	// divide
  1351  	var r nat
  1352  	z.mant, r = z.mant.div(nil, xadj, y.mant)
  1353  	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
  1354  
  1355  	// The result is long enough to include (at least) the rounding bit.
  1356  	// If there's a non-zero remainder, the corresponding fractional part
  1357  	// (if it were computed), would have a non-zero sticky bit (if it were
  1358  	// zero, it couldn't have a non-zero remainder).
  1359  	var sbit uint
  1360  	if len(r) > 0 {
  1361  		sbit = 1
  1362  	}
  1363  
  1364  	z.setExpAndRound(e-fnorm(z.mant), sbit)
  1365  }
  1366  
  1367  // ucmp returns -1, 0, or +1, depending on whether
  1368  // |x| < |y|, |x| == |y|, or |x| > |y|.
  1369  // x and y must have a non-empty mantissa and valid exponent.
  1370  func (x *Float) ucmp(y *Float) int {
  1371  	if debugFloat {
  1372  		validateBinaryOperands(x, y)
  1373  	}
  1374  
  1375  	switch {
  1376  	case x.exp < y.exp:
  1377  		return -1
  1378  	case x.exp > y.exp:
  1379  		return +1
  1380  	}
  1381  	// x.exp == y.exp
  1382  
  1383  	// compare mantissas
  1384  	i := len(x.mant)
  1385  	j := len(y.mant)
  1386  	for i > 0 || j > 0 {
  1387  		var xm, ym Word
  1388  		if i > 0 {
  1389  			i--
  1390  			xm = x.mant[i]
  1391  		}
  1392  		if j > 0 {
  1393  			j--
  1394  			ym = y.mant[j]
  1395  		}
  1396  		switch {
  1397  		case xm < ym:
  1398  			return -1
  1399  		case xm > ym:
  1400  			return +1
  1401  		}
  1402  	}
  1403  
  1404  	return 0
  1405  }
  1406  
  1407  // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
  1408  //
  1409  // When neither the inputs nor result are NaN, the sign of a product or
  1410  // quotient is the exclusive OR of the operands’ signs; the sign of a sum,
  1411  // or of a difference x−y regarded as a sum x+(−y), differs from at most
  1412  // one of the addends’ signs; and the sign of the result of conversions,
  1413  // the quantize operation, the roundToIntegral operations, and the
  1414  // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
  1415  // These rules shall apply even when operands or results are zero or infinite.
  1416  //
  1417  // When the sum of two operands with opposite signs (or the difference of
  1418  // two operands with like signs) is exactly zero, the sign of that sum (or
  1419  // difference) shall be +0 in all rounding-direction attributes except
  1420  // roundTowardNegative; under that attribute, the sign of an exact zero
  1421  // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
  1422  // sign as x even when x is zero.
  1423  //
  1424  // See also: https://play.golang.org/p/RtH3UCt5IH
  1425  
  1426  // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
  1427  // it is changed to the larger of x's or y's precision before the operation.
  1428  // Rounding is performed according to z's precision and rounding mode; and
  1429  // z's accuracy reports the result error relative to the exact (not rounded)
  1430  // result. Add panics with ErrNaN if x and y are infinities with opposite
  1431  // signs. The value of z is undefined in that case.
  1432  //
  1433  // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
  1434  func (z *Float) Add(x, y *Float) *Float {
  1435  	if debugFloat {
  1436  		x.validate()
  1437  		y.validate()
  1438  	}
  1439  
  1440  	if z.prec == 0 {
  1441  		z.prec = umax32(x.prec, y.prec)
  1442  	}
  1443  
  1444  	if x.form == finite && y.form == finite {
  1445  		// x + y (common case)
  1446  
  1447  		// Below we set z.neg = x.neg, and when z aliases y this will
  1448  		// change the y operand's sign. This is fine, because if an
  1449  		// operand aliases the receiver it'll be overwritten, but we still
  1450  		// want the original x.neg and y.neg values when we evaluate
  1451  		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
  1452  		yneg := y.neg
  1453  
  1454  		z.neg = x.neg
  1455  		if x.neg == yneg {
  1456  			// x + y == x + y
  1457  			// (-x) + (-y) == -(x + y)
  1458  			z.uadd(x, y)
  1459  		} else {
  1460  			// x + (-y) == x - y == -(y - x)
  1461  			// (-x) + y == y - x == -(x - y)
  1462  			if x.ucmp(y) > 0 {
  1463  				z.usub(x, y)
  1464  			} else {
  1465  				z.neg = !z.neg
  1466  				z.usub(y, x)
  1467  			}
  1468  		}
  1469  		return z
  1470  	}
  1471  
  1472  	if x.form == inf && y.form == inf && x.neg != y.neg {
  1473  		// +Inf + -Inf
  1474  		// -Inf + +Inf
  1475  		// value of z is undefined but make sure it's valid
  1476  		z.acc = Exact
  1477  		z.form = zero
  1478  		z.neg = false
  1479  		panic(ErrNaN{"addition of infinities with opposite signs"})
  1480  	}
  1481  
  1482  	if x.form == zero && y.form == zero {
  1483  		// ±0 + ±0
  1484  		z.acc = Exact
  1485  		z.form = zero
  1486  		z.neg = x.neg && y.neg // -0 + -0 == -0
  1487  		return z
  1488  	}
  1489  
  1490  	if x.form == inf || y.form == zero {
  1491  		// ±Inf + y
  1492  		// x + ±0
  1493  		return z.Set(x)
  1494  	}
  1495  
  1496  	// ±0 + y
  1497  	// x + ±Inf
  1498  	return z.Set(y)
  1499  }
  1500  
  1501  // Sub sets z to the rounded difference x-y and returns z.
  1502  // Precision, rounding, and accuracy reporting are as for Add.
  1503  // Sub panics with ErrNaN if x and y are infinities with equal
  1504  // signs. The value of z is undefined in that case.
  1505  func (z *Float) Sub(x, y *Float) *Float {
  1506  	if debugFloat {
  1507  		x.validate()
  1508  		y.validate()
  1509  	}
  1510  
  1511  	if z.prec == 0 {
  1512  		z.prec = umax32(x.prec, y.prec)
  1513  	}
  1514  
  1515  	if x.form == finite && y.form == finite {
  1516  		// x - y (common case)
  1517  		yneg := y.neg
  1518  		z.neg = x.neg
  1519  		if x.neg != yneg {
  1520  			// x - (-y) == x + y
  1521  			// (-x) - y == -(x + y)
  1522  			z.uadd(x, y)
  1523  		} else {
  1524  			// x - y == x - y == -(y - x)
  1525  			// (-x) - (-y) == y - x == -(x - y)
  1526  			if x.ucmp(y) > 0 {
  1527  				z.usub(x, y)
  1528  			} else {
  1529  				z.neg = !z.neg
  1530  				z.usub(y, x)
  1531  			}
  1532  		}
  1533  		return z
  1534  	}
  1535  
  1536  	if x.form == inf && y.form == inf && x.neg == y.neg {
  1537  		// +Inf - +Inf
  1538  		// -Inf - -Inf
  1539  		// value of z is undefined but make sure it's valid
  1540  		z.acc = Exact
  1541  		z.form = zero
  1542  		z.neg = false
  1543  		panic(ErrNaN{"subtraction of infinities with equal signs"})
  1544  	}
  1545  
  1546  	if x.form == zero && y.form == zero {
  1547  		// ±0 - ±0
  1548  		z.acc = Exact
  1549  		z.form = zero
  1550  		z.neg = x.neg && !y.neg // -0 - +0 == -0
  1551  		return z
  1552  	}
  1553  
  1554  	if x.form == inf || y.form == zero {
  1555  		// ±Inf - y
  1556  		// x - ±0
  1557  		return z.Set(x)
  1558  	}
  1559  
  1560  	// ±0 - y
  1561  	// x - ±Inf
  1562  	return z.Neg(y)
  1563  }
  1564  
  1565  // Mul sets z to the rounded product x*y and returns z.
  1566  // Precision, rounding, and accuracy reporting are as for Add.
  1567  // Mul panics with ErrNaN if one operand is zero and the other
  1568  // operand an infinity. The value of z is undefined in that case.
  1569  func (z *Float) Mul(x, y *Float) *Float {
  1570  	if debugFloat {
  1571  		x.validate()
  1572  		y.validate()
  1573  	}
  1574  
  1575  	if z.prec == 0 {
  1576  		z.prec = umax32(x.prec, y.prec)
  1577  	}
  1578  
  1579  	z.neg = x.neg != y.neg
  1580  
  1581  	if x.form == finite && y.form == finite {
  1582  		// x * y (common case)
  1583  		z.umul(x, y)
  1584  		return z
  1585  	}
  1586  
  1587  	z.acc = Exact
  1588  	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
  1589  		// ±0 * ±Inf
  1590  		// ±Inf * ±0
  1591  		// value of z is undefined but make sure it's valid
  1592  		z.form = zero
  1593  		z.neg = false
  1594  		panic(ErrNaN{"multiplication of zero with infinity"})
  1595  	}
  1596  
  1597  	if x.form == inf || y.form == inf {
  1598  		// ±Inf * y
  1599  		// x * ±Inf
  1600  		z.form = inf
  1601  		return z
  1602  	}
  1603  
  1604  	// ±0 * y
  1605  	// x * ±0
  1606  	z.form = zero
  1607  	return z
  1608  }
  1609  
  1610  // Quo sets z to the rounded quotient x/y and returns z.
  1611  // Precision, rounding, and accuracy reporting are as for Add.
  1612  // Quo panics with ErrNaN if both operands are zero or infinities.
  1613  // The value of z is undefined in that case.
  1614  func (z *Float) Quo(x, y *Float) *Float {
  1615  	if debugFloat {
  1616  		x.validate()
  1617  		y.validate()
  1618  	}
  1619  
  1620  	if z.prec == 0 {
  1621  		z.prec = umax32(x.prec, y.prec)
  1622  	}
  1623  
  1624  	z.neg = x.neg != y.neg
  1625  
  1626  	if x.form == finite && y.form == finite {
  1627  		// x / y (common case)
  1628  		z.uquo(x, y)
  1629  		return z
  1630  	}
  1631  
  1632  	z.acc = Exact
  1633  	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
  1634  		// ±0 / ±0
  1635  		// ±Inf / ±Inf
  1636  		// value of z is undefined but make sure it's valid
  1637  		z.form = zero
  1638  		z.neg = false
  1639  		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
  1640  	}
  1641  
  1642  	if x.form == zero || y.form == inf {
  1643  		// ±0 / y
  1644  		// x / ±Inf
  1645  		z.form = zero
  1646  		return z
  1647  	}
  1648  
  1649  	// x / ±0
  1650  	// ±Inf / y
  1651  	z.form = inf
  1652  	return z
  1653  }
  1654  
  1655  // Cmp compares x and y and returns:
  1656  //
  1657  //   -1 if x <  y
  1658  //    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
  1659  //   +1 if x >  y
  1660  //
  1661  func (x *Float) Cmp(y *Float) int {
  1662  	if debugFloat {
  1663  		x.validate()
  1664  		y.validate()
  1665  	}
  1666  
  1667  	mx := x.ord()
  1668  	my := y.ord()
  1669  	switch {
  1670  	case mx < my:
  1671  		return -1
  1672  	case mx > my:
  1673  		return +1
  1674  	}
  1675  	// mx == my
  1676  
  1677  	// only if |mx| == 1 we have to compare the mantissae
  1678  	switch mx {
  1679  	case -1:
  1680  		return y.ucmp(x)
  1681  	case +1:
  1682  		return x.ucmp(y)
  1683  	}
  1684  
  1685  	return 0
  1686  }
  1687  
  1688  // ord classifies x and returns:
  1689  //
  1690  //	-2 if -Inf == x
  1691  //	-1 if -Inf < x < 0
  1692  //	 0 if x == 0 (signed or unsigned)
  1693  //	+1 if 0 < x < +Inf
  1694  //	+2 if x == +Inf
  1695  //
  1696  func (x *Float) ord() int {
  1697  	var m int
  1698  	switch x.form {
  1699  	case finite:
  1700  		m = 1
  1701  	case zero:
  1702  		return 0
  1703  	case inf:
  1704  		m = 2
  1705  	}
  1706  	if x.neg {
  1707  		m = -m
  1708  	}
  1709  	return m
  1710  }
  1711  
  1712  func umax32(x, y uint32) uint32 {
  1713  	if x > y {
  1714  		return x
  1715  	}
  1716  	return y
  1717  }
  1718  

View as plain text