Source file src/math/big/float.go
Documentation: math/big
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (https://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 "math/bits" 18 ) 19 20 const debugFloat = false // enable for debugging 21 22 // A nonzero finite Float represents a multi-precision floating point number 23 // 24 // sign × mantissa × 2**exponent 25 // 26 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 27 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 28 // All Floats are ordered, and the ordering of two Floats x and y 29 // is defined by x.Cmp(y). 30 // 31 // Each Float value also has a precision, rounding mode, and accuracy. 32 // The precision is the maximum number of mantissa bits available to 33 // represent the value. The rounding mode specifies how a result should 34 // be rounded to fit into the mantissa bits, and accuracy describes the 35 // rounding error with respect to the exact result. 36 // 37 // Unless specified otherwise, all operations (including setters) that 38 // specify a *Float variable for the result (usually via the receiver 39 // with the exception of MantExp), round the numeric result according 40 // to the precision and rounding mode of the result variable. 41 // 42 // If the provided result precision is 0 (see below), it is set to the 43 // precision of the argument with the largest precision value before any 44 // rounding takes place, and the rounding mode remains unchanged. Thus, 45 // uninitialized Floats provided as result arguments will have their 46 // precision set to a reasonable value determined by the operands, and 47 // their mode is the zero value for RoundingMode (ToNearestEven). 48 // 49 // By setting the desired precision to 24 or 53 and using matching rounding 50 // mode (typically ToNearestEven), Float operations produce the same results 51 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 52 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 53 // Exponent underflow and overflow lead to a 0 or an Infinity for different 54 // values than IEEE-754 because Float exponents have a much larger range. 55 // 56 // The zero (uninitialized) value for a Float is ready to use and represents 57 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 58 // 59 // Operations always take pointer arguments (*Float) rather 60 // than Float values, and each unique Float value requires 61 // its own unique *Float pointer. To "copy" a Float value, 62 // an existing (or newly allocated) Float must be set to 63 // a new value using the Float.Set method; shallow copies 64 // of Floats are not supported and may lead to errors. 65 type Float struct { 66 prec uint32 67 mode RoundingMode 68 acc Accuracy 69 form form 70 neg bool 71 mant nat 72 exp int32 73 } 74 75 // An ErrNaN panic is raised by a Float operation that would lead to 76 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 77 type ErrNaN struct { 78 msg string 79 } 80 81 func (err ErrNaN) Error() string { 82 return err.msg 83 } 84 85 // NewFloat allocates and returns a new Float set to x, 86 // with precision 53 and rounding mode ToNearestEven. 87 // NewFloat panics with ErrNaN if x is a NaN. 88 func NewFloat(x float64) *Float { 89 if math.IsNaN(x) { 90 panic(ErrNaN{"NewFloat(NaN)"}) 91 } 92 return new(Float).SetFloat64(x) 93 } 94 95 // Exponent and precision limits. 96 const ( 97 MaxExp = math.MaxInt32 // largest supported exponent 98 MinExp = math.MinInt32 // smallest supported exponent 99 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 100 ) 101 102 // Internal representation: The mantissa bits x.mant of a nonzero finite 103 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 104 // the slice may (but doesn't have to) be shorter if the mantissa contains 105 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 106 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 107 // trailing 0 bits or x.prec is not a multiple of the Word size _W, 108 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 109 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 110 // 111 // A zero or non-finite Float x ignores x.mant and x.exp. 112 // 113 // x form neg mant exp 114 // ---------------------------------------------------------- 115 // ±0 zero sign - - 116 // 0 < |x| < +Inf finite sign mantissa exponent 117 // ±Inf inf sign - - 118 119 // A form value describes the internal representation. 120 type form byte 121 122 // The form value order is relevant - do not change! 123 const ( 124 zero form = iota 125 finite 126 inf 127 ) 128 129 // RoundingMode determines how a Float value is rounded to the 130 // desired precision. Rounding may change the Float value; the 131 // rounding error is described by the Float's Accuracy. 132 type RoundingMode byte 133 134 // These constants define supported rounding modes. 135 const ( 136 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 137 ToNearestAway // == IEEE 754-2008 roundTiesToAway 138 ToZero // == IEEE 754-2008 roundTowardZero 139 AwayFromZero // no IEEE 754-2008 equivalent 140 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 141 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 142 ) 143 144 //go:generate stringer -type=RoundingMode 145 146 // Accuracy describes the rounding error produced by the most recent 147 // operation that generated a Float value, relative to the exact value. 148 type Accuracy int8 149 150 // Constants describing the Accuracy of a Float. 151 const ( 152 Below Accuracy = -1 153 Exact Accuracy = 0 154 Above Accuracy = +1 155 ) 156 157 //go:generate stringer -type=Accuracy 158 159 // SetPrec sets z's precision to prec and returns the (possibly) rounded 160 // value of z. Rounding occurs according to z's rounding mode if the mantissa 161 // cannot be represented in prec bits without loss of precision. 162 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 163 // If prec > MaxPrec, it is set to MaxPrec. 164 func (z *Float) SetPrec(prec uint) *Float { 165 z.acc = Exact // optimistically assume no rounding is needed 166 167 // special case 168 if prec == 0 { 169 z.prec = 0 170 if z.form == finite { 171 // truncate z to 0 172 z.acc = makeAcc(z.neg) 173 z.form = zero 174 } 175 return z 176 } 177 178 // general case 179 if prec > MaxPrec { 180 prec = MaxPrec 181 } 182 old := z.prec 183 z.prec = uint32(prec) 184 if z.prec < old { 185 z.round(0) 186 } 187 return z 188 } 189 190 func makeAcc(above bool) Accuracy { 191 if above { 192 return Above 193 } 194 return Below 195 } 196 197 // SetMode sets z's rounding mode to mode and returns an exact z. 198 // z remains unchanged otherwise. 199 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 200 func (z *Float) SetMode(mode RoundingMode) *Float { 201 z.mode = mode 202 z.acc = Exact 203 return z 204 } 205 206 // Prec returns the mantissa precision of x in bits. 207 // The result may be 0 for |x| == 0 and |x| == Inf. 208 func (x *Float) Prec() uint { 209 return uint(x.prec) 210 } 211 212 // MinPrec returns the minimum precision required to represent x exactly 213 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 214 // The result is 0 for |x| == 0 and |x| == Inf. 215 func (x *Float) MinPrec() uint { 216 if x.form != finite { 217 return 0 218 } 219 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 220 } 221 222 // Mode returns the rounding mode of x. 223 func (x *Float) Mode() RoundingMode { 224 return x.mode 225 } 226 227 // Acc returns the accuracy of x produced by the most recent 228 // operation, unless explicitly documented otherwise by that 229 // operation. 230 func (x *Float) Acc() Accuracy { 231 return x.acc 232 } 233 234 // Sign returns: 235 // 236 // -1 if x < 0 237 // 0 if x is ±0 238 // +1 if x > 0 239 // 240 func (x *Float) Sign() int { 241 if debugFloat { 242 x.validate() 243 } 244 if x.form == zero { 245 return 0 246 } 247 if x.neg { 248 return -1 249 } 250 return 1 251 } 252 253 // MantExp breaks x into its mantissa and exponent components 254 // and returns the exponent. If a non-nil mant argument is 255 // provided its value is set to the mantissa of x, with the 256 // same precision and rounding mode as x. The components 257 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 258 // Calling MantExp with a nil argument is an efficient way to 259 // get the exponent of the receiver. 260 // 261 // Special cases are: 262 // 263 // ( ±0).MantExp(mant) = 0, with mant set to ±0 264 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 265 // 266 // x and mant may be the same in which case x is set to its 267 // mantissa value. 268 func (x *Float) MantExp(mant *Float) (exp int) { 269 if debugFloat { 270 x.validate() 271 } 272 if x.form == finite { 273 exp = int(x.exp) 274 } 275 if mant != nil { 276 mant.Copy(x) 277 if mant.form == finite { 278 mant.exp = 0 279 } 280 } 281 return 282 } 283 284 func (z *Float) setExpAndRound(exp int64, sbit uint) { 285 if exp < MinExp { 286 // underflow 287 z.acc = makeAcc(z.neg) 288 z.form = zero 289 return 290 } 291 292 if exp > MaxExp { 293 // overflow 294 z.acc = makeAcc(!z.neg) 295 z.form = inf 296 return 297 } 298 299 z.form = finite 300 z.exp = int32(exp) 301 z.round(sbit) 302 } 303 304 // SetMantExp sets z to mant × 2**exp and returns z. 305 // The result z has the same precision and rounding mode 306 // as mant. SetMantExp is an inverse of MantExp but does 307 // not require 0.5 <= |mant| < 1.0. Specifically: 308 // 309 // mant := new(Float) 310 // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 311 // 312 // Special cases are: 313 // 314 // z.SetMantExp( ±0, exp) = ±0 315 // z.SetMantExp(±Inf, exp) = ±Inf 316 // 317 // z and mant may be the same in which case z's exponent 318 // is set to exp. 319 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 320 if debugFloat { 321 z.validate() 322 mant.validate() 323 } 324 z.Copy(mant) 325 if z.form != finite { 326 return z 327 } 328 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 329 return z 330 } 331 332 // Signbit reports whether x is negative or negative zero. 333 func (x *Float) Signbit() bool { 334 return x.neg 335 } 336 337 // IsInf reports whether x is +Inf or -Inf. 338 func (x *Float) IsInf() bool { 339 return x.form == inf 340 } 341 342 // IsInt reports whether x is an integer. 343 // ±Inf values are not integers. 344 func (x *Float) IsInt() bool { 345 if debugFloat { 346 x.validate() 347 } 348 // special cases 349 if x.form != finite { 350 return x.form == zero 351 } 352 // x.form == finite 353 if x.exp <= 0 { 354 return false 355 } 356 // x.exp > 0 357 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 358 } 359 360 // debugging support 361 func (x *Float) validate() { 362 if !debugFloat { 363 // avoid performance bugs 364 panic("validate called but debugFloat is not set") 365 } 366 if x.form != finite { 367 return 368 } 369 m := len(x.mant) 370 if m == 0 { 371 panic("nonzero finite number with empty mantissa") 372 } 373 const msb = 1 << (_W - 1) 374 if x.mant[m-1]&msb == 0 { 375 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 376 } 377 if x.prec == 0 { 378 panic("zero precision finite number") 379 } 380 } 381 382 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 383 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 384 // have before calling round. z's mantissa must be normalized (with the msb set) 385 // or empty. 386 // 387 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 388 // sign of z. For correct rounding, the sign of z must be set correctly before 389 // calling round. 390 func (z *Float) round(sbit uint) { 391 if debugFloat { 392 z.validate() 393 } 394 395 z.acc = Exact 396 if z.form != finite { 397 // ±0 or ±Inf => nothing left to do 398 return 399 } 400 // z.form == finite && len(z.mant) > 0 401 // m > 0 implies z.prec > 0 (checked by validate) 402 403 m := uint32(len(z.mant)) // present mantissa length in words 404 bits := m * _W // present mantissa bits; bits > 0 405 if bits <= z.prec { 406 // mantissa fits => nothing to do 407 return 408 } 409 // bits > z.prec 410 411 // Rounding is based on two bits: the rounding bit (rbit) and the 412 // sticky bit (sbit). The rbit is the bit immediately before the 413 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 414 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 415 // 416 // rbit sbit => "fractional part" 417 // 418 // 0 0 == 0 419 // 0 1 > 0 , < 0.5 420 // 1 0 == 0.5 421 // 1 1 > 0.5, < 1.0 422 423 // bits > z.prec: mantissa too large => round 424 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 425 rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit 426 // The sticky bit is only needed for rounding ToNearestEven 427 // or when the rounding bit is zero. Avoid computation otherwise. 428 if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { 429 sbit = z.mant.sticky(r) 430 } 431 sbit &= 1 // be safe and ensure it's a single bit 432 433 // cut off extra words 434 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 435 if m > n { 436 copy(z.mant, z.mant[m-n:]) // move n last words to front 437 z.mant = z.mant[:n] 438 } 439 440 // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word 441 ntz := n*_W - z.prec // 0 <= ntz < _W 442 lsb := Word(1) << ntz 443 444 // round if result is inexact 445 if rbit|sbit != 0 { 446 // Make rounding decision: The result mantissa is truncated ("rounded down") 447 // by default. Decide if we need to increment, or "round up", the (unsigned) 448 // mantissa. 449 inc := false 450 switch z.mode { 451 case ToNegativeInf: 452 inc = z.neg 453 case ToZero: 454 // nothing to do 455 case ToNearestEven: 456 inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) 457 case ToNearestAway: 458 inc = rbit != 0 459 case AwayFromZero: 460 inc = true 461 case ToPositiveInf: 462 inc = !z.neg 463 default: 464 panic("unreachable") 465 } 466 467 // A positive result (!z.neg) is Above the exact result if we increment, 468 // and it's Below if we truncate (Exact results require no rounding). 469 // For a negative result (z.neg) it is exactly the opposite. 470 z.acc = makeAcc(inc != z.neg) 471 472 if inc { 473 // add 1 to mantissa 474 if addVW(z.mant, z.mant, lsb) != 0 { 475 // mantissa overflow => adjust exponent 476 if z.exp >= MaxExp { 477 // exponent overflow 478 z.form = inf 479 return 480 } 481 z.exp++ 482 // adjust mantissa: divide by 2 to compensate for exponent adjustment 483 shrVU(z.mant, z.mant, 1) 484 // set msb == carry == 1 from the mantissa overflow above 485 const msb = 1 << (_W - 1) 486 z.mant[n-1] |= msb 487 } 488 } 489 } 490 491 // zero out trailing bits in least-significant word 492 z.mant[0] &^= lsb - 1 493 494 if debugFloat { 495 z.validate() 496 } 497 } 498 499 func (z *Float) setBits64(neg bool, x uint64) *Float { 500 if z.prec == 0 { 501 z.prec = 64 502 } 503 z.acc = Exact 504 z.neg = neg 505 if x == 0 { 506 z.form = zero 507 return z 508 } 509 // x != 0 510 z.form = finite 511 s := bits.LeadingZeros64(x) 512 z.mant = z.mant.setUint64(x << uint(s)) 513 z.exp = int32(64 - s) // always fits 514 if z.prec < 64 { 515 z.round(0) 516 } 517 return z 518 } 519 520 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 521 // If z's precision is 0, it is changed to 64 (and rounding will have 522 // no effect). 523 func (z *Float) SetUint64(x uint64) *Float { 524 return z.setBits64(false, x) 525 } 526 527 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 528 // If z's precision is 0, it is changed to 64 (and rounding will have 529 // no effect). 530 func (z *Float) SetInt64(x int64) *Float { 531 u := x 532 if u < 0 { 533 u = -u 534 } 535 // We cannot simply call z.SetUint64(uint64(u)) and change 536 // the sign afterwards because the sign affects rounding. 537 return z.setBits64(x < 0, uint64(u)) 538 } 539 540 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 541 // If z's precision is 0, it is changed to 53 (and rounding will have 542 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 543 func (z *Float) SetFloat64(x float64) *Float { 544 if z.prec == 0 { 545 z.prec = 53 546 } 547 if math.IsNaN(x) { 548 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 549 } 550 z.acc = Exact 551 z.neg = math.Signbit(x) // handle -0, -Inf correctly 552 if x == 0 { 553 z.form = zero 554 return z 555 } 556 if math.IsInf(x, 0) { 557 z.form = inf 558 return z 559 } 560 // normalized x != 0 561 z.form = finite 562 fmant, exp := math.Frexp(x) // get normalized mantissa 563 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 564 z.exp = int32(exp) // always fits 565 if z.prec < 53 { 566 z.round(0) 567 } 568 return z 569 } 570 571 // fnorm normalizes mantissa m by shifting it to the left 572 // such that the msb of the most-significant word (msw) is 1. 573 // It returns the shift amount. It assumes that len(m) != 0. 574 func fnorm(m nat) int64 { 575 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 576 panic("msw of mantissa is 0") 577 } 578 s := nlz(m[len(m)-1]) 579 if s > 0 { 580 c := shlVU(m, m, s) 581 if debugFloat && c != 0 { 582 panic("nlz or shlVU incorrect") 583 } 584 } 585 return int64(s) 586 } 587 588 // SetInt sets z to the (possibly rounded) value of x and returns z. 589 // If z's precision is 0, it is changed to the larger of x.BitLen() 590 // or 64 (and rounding will have no effect). 591 func (z *Float) SetInt(x *Int) *Float { 592 // TODO(gri) can be more efficient if z.prec > 0 593 // but small compared to the size of x, or if there 594 // are many trailing 0's. 595 bits := uint32(x.BitLen()) 596 if z.prec == 0 { 597 z.prec = umax32(bits, 64) 598 } 599 z.acc = Exact 600 z.neg = x.neg 601 if len(x.abs) == 0 { 602 z.form = zero 603 return z 604 } 605 // x != 0 606 z.mant = z.mant.set(x.abs) 607 fnorm(z.mant) 608 z.setExpAndRound(int64(bits), 0) 609 return z 610 } 611 612 // SetRat sets z to the (possibly rounded) value of x and returns z. 613 // If z's precision is 0, it is changed to the largest of a.BitLen(), 614 // b.BitLen(), or 64; with x = a/b. 615 func (z *Float) SetRat(x *Rat) *Float { 616 if x.IsInt() { 617 return z.SetInt(x.Num()) 618 } 619 var a, b Float 620 a.SetInt(x.Num()) 621 b.SetInt(x.Denom()) 622 if z.prec == 0 { 623 z.prec = umax32(a.prec, b.prec) 624 } 625 return z.Quo(&a, &b) 626 } 627 628 // SetInf sets z to the infinite Float -Inf if signbit is 629 // set, or +Inf if signbit is not set, and returns z. The 630 // precision of z is unchanged and the result is always 631 // Exact. 632 func (z *Float) SetInf(signbit bool) *Float { 633 z.acc = Exact 634 z.form = inf 635 z.neg = signbit 636 return z 637 } 638 639 // Set sets z to the (possibly rounded) value of x and returns z. 640 // If z's precision is 0, it is changed to the precision of x 641 // before setting z (and rounding will have no effect). 642 // Rounding is performed according to z's precision and rounding 643 // mode; and z's accuracy reports the result error relative to the 644 // exact (not rounded) result. 645 func (z *Float) Set(x *Float) *Float { 646 if debugFloat { 647 x.validate() 648 } 649 z.acc = Exact 650 if z != x { 651 z.form = x.form 652 z.neg = x.neg 653 if x.form == finite { 654 z.exp = x.exp 655 z.mant = z.mant.set(x.mant) 656 } 657 if z.prec == 0 { 658 z.prec = x.prec 659 } else if z.prec < x.prec { 660 z.round(0) 661 } 662 } 663 return z 664 } 665 666 // Copy sets z to x, with the same precision, rounding mode, and 667 // accuracy as x, and returns z. x is not changed even if z and 668 // x are the same. 669 func (z *Float) Copy(x *Float) *Float { 670 if debugFloat { 671 x.validate() 672 } 673 if z != x { 674 z.prec = x.prec 675 z.mode = x.mode 676 z.acc = x.acc 677 z.form = x.form 678 z.neg = x.neg 679 if z.form == finite { 680 z.mant = z.mant.set(x.mant) 681 z.exp = x.exp 682 } 683 } 684 return z 685 } 686 687 // msb32 returns the 32 most significant bits of x. 688 func msb32(x nat) uint32 { 689 i := len(x) - 1 690 if i < 0 { 691 return 0 692 } 693 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 694 panic("x not normalized") 695 } 696 switch _W { 697 case 32: 698 return uint32(x[i]) 699 case 64: 700 return uint32(x[i] >> 32) 701 } 702 panic("unreachable") 703 } 704 705 // msb64 returns the 64 most significant bits of x. 706 func msb64(x nat) uint64 { 707 i := len(x) - 1 708 if i < 0 { 709 return 0 710 } 711 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 712 panic("x not normalized") 713 } 714 switch _W { 715 case 32: 716 v := uint64(x[i]) << 32 717 if i > 0 { 718 v |= uint64(x[i-1]) 719 } 720 return v 721 case 64: 722 return uint64(x[i]) 723 } 724 panic("unreachable") 725 } 726 727 // Uint64 returns the unsigned integer resulting from truncating x 728 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 729 // if x is an integer and Below otherwise. 730 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 731 // for x > math.MaxUint64. 732 func (x *Float) Uint64() (uint64, Accuracy) { 733 if debugFloat { 734 x.validate() 735 } 736 737 switch x.form { 738 case finite: 739 if x.neg { 740 return 0, Above 741 } 742 // 0 < x < +Inf 743 if x.exp <= 0 { 744 // 0 < x < 1 745 return 0, Below 746 } 747 // 1 <= x < Inf 748 if x.exp <= 64 { 749 // u = trunc(x) fits into a uint64 750 u := msb64(x.mant) >> (64 - uint32(x.exp)) 751 if x.MinPrec() <= 64 { 752 return u, Exact 753 } 754 return u, Below // x truncated 755 } 756 // x too large 757 return math.MaxUint64, Below 758 759 case zero: 760 return 0, Exact 761 762 case inf: 763 if x.neg { 764 return 0, Above 765 } 766 return math.MaxUint64, Below 767 } 768 769 panic("unreachable") 770 } 771 772 // Int64 returns the integer resulting from truncating x towards zero. 773 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 774 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 775 // The result is (math.MinInt64, Above) for x < math.MinInt64, 776 // and (math.MaxInt64, Below) for x > math.MaxInt64. 777 func (x *Float) Int64() (int64, Accuracy) { 778 if debugFloat { 779 x.validate() 780 } 781 782 switch x.form { 783 case finite: 784 // 0 < |x| < +Inf 785 acc := makeAcc(x.neg) 786 if x.exp <= 0 { 787 // 0 < |x| < 1 788 return 0, acc 789 } 790 // x.exp > 0 791 792 // 1 <= |x| < +Inf 793 if x.exp <= 63 { 794 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 795 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 796 if x.neg { 797 i = -i 798 } 799 if x.MinPrec() <= uint(x.exp) { 800 return i, Exact 801 } 802 return i, acc // x truncated 803 } 804 if x.neg { 805 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 806 if x.exp == 64 && x.MinPrec() == 1 { 807 acc = Exact 808 } 809 return math.MinInt64, acc 810 } 811 // x too large 812 return math.MaxInt64, Below 813 814 case zero: 815 return 0, Exact 816 817 case inf: 818 if x.neg { 819 return math.MinInt64, Above 820 } 821 return math.MaxInt64, Below 822 } 823 824 panic("unreachable") 825 } 826 827 // Float32 returns the float32 value nearest to x. If x is too small to be 828 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 829 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 830 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 831 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 832 func (x *Float) Float32() (float32, Accuracy) { 833 if debugFloat { 834 x.validate() 835 } 836 837 switch x.form { 838 case finite: 839 // 0 < |x| < +Inf 840 841 const ( 842 fbits = 32 // float size 843 mbits = 23 // mantissa size (excluding implicit msb) 844 ebits = fbits - mbits - 1 // 8 exponent size 845 bias = 1<<(ebits-1) - 1 // 127 exponent bias 846 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 847 emin = 1 - bias // -126 smallest unbiased exponent (normal) 848 emax = bias // 127 largest unbiased exponent (normal) 849 ) 850 851 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. 852 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 853 854 // Compute precision p for float32 mantissa. 855 // If the exponent is too small, we have a denormal number before 856 // rounding and fewer than p mantissa bits of precision available 857 // (the exponent remains fixed but the mantissa gets shifted right). 858 p := mbits + 1 // precision of normal float 859 if e < emin { 860 // recompute precision 861 p = mbits + 1 - emin + int(e) 862 // If p == 0, the mantissa of x is shifted so much to the right 863 // that its msb falls immediately to the right of the float32 864 // mantissa space. In other words, if the smallest denormal is 865 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 866 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 867 // If m == 0.5, it is rounded down to even, i.e., 0.0. 868 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 869 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 870 // underflow to ±0 871 if x.neg { 872 var z float32 873 return -z, Above 874 } 875 return 0.0, Below 876 } 877 // otherwise, round up 878 // We handle p == 0 explicitly because it's easy and because 879 // Float.round doesn't support rounding to 0 bits of precision. 880 if p == 0 { 881 if x.neg { 882 return -math.SmallestNonzeroFloat32, Below 883 } 884 return math.SmallestNonzeroFloat32, Above 885 } 886 } 887 // p > 0 888 889 // round 890 var r Float 891 r.prec = uint32(p) 892 r.Set(x) 893 e = r.exp - 1 894 895 // Rounding may have caused r to overflow to ±Inf 896 // (rounding never causes underflows to 0). 897 // If the exponent is too large, also overflow to ±Inf. 898 if r.form == inf || e > emax { 899 // overflow 900 if x.neg { 901 return float32(math.Inf(-1)), Below 902 } 903 return float32(math.Inf(+1)), Above 904 } 905 // e <= emax 906 907 // Determine sign, biased exponent, and mantissa. 908 var sign, bexp, mant uint32 909 if x.neg { 910 sign = 1 << (fbits - 1) 911 } 912 913 // Rounding may have caused a denormal number to 914 // become normal. Check again. 915 if e < emin { 916 // denormal number: recompute precision 917 // Since rounding may have at best increased precision 918 // and we have eliminated p <= 0 early, we know p > 0. 919 // bexp == 0 for denormals 920 p = mbits + 1 - emin + int(e) 921 mant = msb32(r.mant) >> uint(fbits-p) 922 } else { 923 // normal number: emin <= e <= emax 924 bexp = uint32(e+bias) << mbits 925 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 926 } 927 928 return math.Float32frombits(sign | bexp | mant), r.acc 929 930 case zero: 931 if x.neg { 932 var z float32 933 return -z, Exact 934 } 935 return 0.0, Exact 936 937 case inf: 938 if x.neg { 939 return float32(math.Inf(-1)), Exact 940 } 941 return float32(math.Inf(+1)), Exact 942 } 943 944 panic("unreachable") 945 } 946 947 // Float64 returns the float64 value nearest to x. If x is too small to be 948 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 949 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 950 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 951 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 952 func (x *Float) Float64() (float64, Accuracy) { 953 if debugFloat { 954 x.validate() 955 } 956 957 switch x.form { 958 case finite: 959 // 0 < |x| < +Inf 960 961 const ( 962 fbits = 64 // float size 963 mbits = 52 // mantissa size (excluding implicit msb) 964 ebits = fbits - mbits - 1 // 11 exponent size 965 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 966 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 967 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 968 emax = bias // 1023 largest unbiased exponent (normal) 969 ) 970 971 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. 972 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 973 974 // Compute precision p for float64 mantissa. 975 // If the exponent is too small, we have a denormal number before 976 // rounding and fewer than p mantissa bits of precision available 977 // (the exponent remains fixed but the mantissa gets shifted right). 978 p := mbits + 1 // precision of normal float 979 if e < emin { 980 // recompute precision 981 p = mbits + 1 - emin + int(e) 982 // If p == 0, the mantissa of x is shifted so much to the right 983 // that its msb falls immediately to the right of the float64 984 // mantissa space. In other words, if the smallest denormal is 985 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 986 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 987 // If m == 0.5, it is rounded down to even, i.e., 0.0. 988 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 989 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 990 // underflow to ±0 991 if x.neg { 992 var z float64 993 return -z, Above 994 } 995 return 0.0, Below 996 } 997 // otherwise, round up 998 // We handle p == 0 explicitly because it's easy and because 999 // Float.round doesn't support rounding to 0 bits of precision. 1000 if p == 0 { 1001 if x.neg { 1002 return -math.SmallestNonzeroFloat64, Below 1003 } 1004 return math.SmallestNonzeroFloat64, Above 1005 } 1006 } 1007 // p > 0 1008 1009 // round 1010 var r Float 1011 r.prec = uint32(p) 1012 r.Set(x) 1013 e = r.exp - 1 1014 1015 // Rounding may have caused r to overflow to ±Inf 1016 // (rounding never causes underflows to 0). 1017 // If the exponent is too large, also overflow to ±Inf. 1018 if r.form == inf || e > emax { 1019 // overflow 1020 if x.neg { 1021 return math.Inf(-1), Below 1022 } 1023 return math.Inf(+1), Above 1024 } 1025 // e <= emax 1026 1027 // Determine sign, biased exponent, and mantissa. 1028 var sign, bexp, mant uint64 1029 if x.neg { 1030 sign = 1 << (fbits - 1) 1031 } 1032 1033 // Rounding may have caused a denormal number to 1034 // become normal. Check again. 1035 if e < emin { 1036 // denormal number: recompute precision 1037 // Since rounding may have at best increased precision 1038 // and we have eliminated p <= 0 early, we know p > 0. 1039 // bexp == 0 for denormals 1040 p = mbits + 1 - emin + int(e) 1041 mant = msb64(r.mant) >> uint(fbits-p) 1042 } else { 1043 // normal number: emin <= e <= emax 1044 bexp = uint64(e+bias) << mbits 1045 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1046 } 1047 1048 return math.Float64frombits(sign | bexp | mant), r.acc 1049 1050 case zero: 1051 if x.neg { 1052 var z float64 1053 return -z, Exact 1054 } 1055 return 0.0, Exact 1056 1057 case inf: 1058 if x.neg { 1059 return math.Inf(-1), Exact 1060 } 1061 return math.Inf(+1), Exact 1062 } 1063 1064 panic("unreachable") 1065 } 1066 1067 // Int returns the result of truncating x towards zero; 1068 // or nil if x is an infinity. 1069 // The result is Exact if x.IsInt(); otherwise it is Below 1070 // for x > 0, and Above for x < 0. 1071 // If a non-nil *Int argument z is provided, Int stores 1072 // the result in z instead of allocating a new Int. 1073 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1074 if debugFloat { 1075 x.validate() 1076 } 1077 1078 if z == nil && x.form <= finite { 1079 z = new(Int) 1080 } 1081 1082 switch x.form { 1083 case finite: 1084 // 0 < |x| < +Inf 1085 acc := makeAcc(x.neg) 1086 if x.exp <= 0 { 1087 // 0 < |x| < 1 1088 return z.SetInt64(0), acc 1089 } 1090 // x.exp > 0 1091 1092 // 1 <= |x| < +Inf 1093 // determine minimum required precision for x 1094 allBits := uint(len(x.mant)) * _W 1095 exp := uint(x.exp) 1096 if x.MinPrec() <= exp { 1097 acc = Exact 1098 } 1099 // shift mantissa as needed 1100 if z == nil { 1101 z = new(Int) 1102 } 1103 z.neg = x.neg 1104 switch { 1105 case exp > allBits: 1106 z.abs = z.abs.shl(x.mant, exp-allBits) 1107 default: 1108 z.abs = z.abs.set(x.mant) 1109 case exp < allBits: 1110 z.abs = z.abs.shr(x.mant, allBits-exp) 1111 } 1112 return z, acc 1113 1114 case zero: 1115 return z.SetInt64(0), Exact 1116 1117 case inf: 1118 return nil, makeAcc(x.neg) 1119 } 1120 1121 panic("unreachable") 1122 } 1123 1124 // Rat returns the rational number corresponding to x; 1125 // or nil if x is an infinity. 1126 // The result is Exact if x is not an Inf. 1127 // If a non-nil *Rat argument z is provided, Rat stores 1128 // the result in z instead of allocating a new Rat. 1129 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1130 if debugFloat { 1131 x.validate() 1132 } 1133 1134 if z == nil && x.form <= finite { 1135 z = new(Rat) 1136 } 1137 1138 switch x.form { 1139 case finite: 1140 // 0 < |x| < +Inf 1141 allBits := int32(len(x.mant)) * _W 1142 // build up numerator and denominator 1143 z.a.neg = x.neg 1144 switch { 1145 case x.exp > allBits: 1146 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1147 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1148 // z already in normal form 1149 default: 1150 z.a.abs = z.a.abs.set(x.mant) 1151 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1152 // z already in normal form 1153 case x.exp < allBits: 1154 z.a.abs = z.a.abs.set(x.mant) 1155 t := z.b.abs.setUint64(1) 1156 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1157 z.norm() 1158 } 1159 return z, Exact 1160 1161 case zero: 1162 return z.SetInt64(0), Exact 1163 1164 case inf: 1165 return nil, makeAcc(x.neg) 1166 } 1167 1168 panic("unreachable") 1169 } 1170 1171 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1172 // and returns z. 1173 func (z *Float) Abs(x *Float) *Float { 1174 z.Set(x) 1175 z.neg = false 1176 return z 1177 } 1178 1179 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1180 // and returns z. 1181 func (z *Float) Neg(x *Float) *Float { 1182 z.Set(x) 1183 z.neg = !z.neg 1184 return z 1185 } 1186 1187 func validateBinaryOperands(x, y *Float) { 1188 if !debugFloat { 1189 // avoid performance bugs 1190 panic("validateBinaryOperands called but debugFloat is not set") 1191 } 1192 if len(x.mant) == 0 { 1193 panic("empty mantissa for x") 1194 } 1195 if len(y.mant) == 0 { 1196 panic("empty mantissa for y") 1197 } 1198 } 1199 1200 // z = x + y, ignoring signs of x and y for the addition 1201 // but using the sign of z for rounding the result. 1202 // x and y must have a non-empty mantissa and valid exponent. 1203 func (z *Float) uadd(x, y *Float) { 1204 // Note: This implementation requires 2 shifts most of the 1205 // time. It is also inefficient if exponents or precisions 1206 // differ by wide margins. The following article describes 1207 // an efficient (but much more complicated) implementation 1208 // compatible with the internal representation used here: 1209 // 1210 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1211 // Point Addition With Exact Rounding (as in the MPFR Library)" 1212 // http://www.vinc17.net/research/papers/rnc6.pdf 1213 1214 if debugFloat { 1215 validateBinaryOperands(x, y) 1216 } 1217 1218 // compute exponents ex, ey for mantissa with "binary point" 1219 // on the right (mantissa.0) - use int64 to avoid overflow 1220 ex := int64(x.exp) - int64(len(x.mant))*_W 1221 ey := int64(y.exp) - int64(len(y.mant))*_W 1222 1223 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1224 1225 // TODO(gri) having a combined add-and-shift primitive 1226 // could make this code significantly faster 1227 switch { 1228 case ex < ey: 1229 if al { 1230 t := nat(nil).shl(y.mant, uint(ey-ex)) 1231 z.mant = z.mant.add(x.mant, t) 1232 } else { 1233 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1234 z.mant = z.mant.add(x.mant, z.mant) 1235 } 1236 default: 1237 // ex == ey, no shift needed 1238 z.mant = z.mant.add(x.mant, y.mant) 1239 case ex > ey: 1240 if al { 1241 t := nat(nil).shl(x.mant, uint(ex-ey)) 1242 z.mant = z.mant.add(t, y.mant) 1243 } else { 1244 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1245 z.mant = z.mant.add(z.mant, y.mant) 1246 } 1247 ex = ey 1248 } 1249 // len(z.mant) > 0 1250 1251 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1252 } 1253 1254 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1255 // but using the sign of z for rounding the result. 1256 // x and y must have a non-empty mantissa and valid exponent. 1257 func (z *Float) usub(x, y *Float) { 1258 // This code is symmetric to uadd. 1259 // We have not factored the common code out because 1260 // eventually uadd (and usub) should be optimized 1261 // by special-casing, and the code will diverge. 1262 1263 if debugFloat { 1264 validateBinaryOperands(x, y) 1265 } 1266 1267 ex := int64(x.exp) - int64(len(x.mant))*_W 1268 ey := int64(y.exp) - int64(len(y.mant))*_W 1269 1270 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1271 1272 switch { 1273 case ex < ey: 1274 if al { 1275 t := nat(nil).shl(y.mant, uint(ey-ex)) 1276 z.mant = t.sub(x.mant, t) 1277 } else { 1278 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1279 z.mant = z.mant.sub(x.mant, z.mant) 1280 } 1281 default: 1282 // ex == ey, no shift needed 1283 z.mant = z.mant.sub(x.mant, y.mant) 1284 case ex > ey: 1285 if al { 1286 t := nat(nil).shl(x.mant, uint(ex-ey)) 1287 z.mant = t.sub(t, y.mant) 1288 } else { 1289 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1290 z.mant = z.mant.sub(z.mant, y.mant) 1291 } 1292 ex = ey 1293 } 1294 1295 // operands may have canceled each other out 1296 if len(z.mant) == 0 { 1297 z.acc = Exact 1298 z.form = zero 1299 z.neg = false 1300 return 1301 } 1302 // len(z.mant) > 0 1303 1304 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1305 } 1306 1307 // z = x * y, ignoring signs of x and y for the multiplication 1308 // but using the sign of z for rounding the result. 1309 // x and y must have a non-empty mantissa and valid exponent. 1310 func (z *Float) umul(x, y *Float) { 1311 if debugFloat { 1312 validateBinaryOperands(x, y) 1313 } 1314 1315 // Note: This is doing too much work if the precision 1316 // of z is less than the sum of the precisions of x 1317 // and y which is often the case (e.g., if all floats 1318 // have the same precision). 1319 // TODO(gri) Optimize this for the common case. 1320 1321 e := int64(x.exp) + int64(y.exp) 1322 if x == y { 1323 z.mant = z.mant.sqr(x.mant) 1324 } else { 1325 z.mant = z.mant.mul(x.mant, y.mant) 1326 } 1327 z.setExpAndRound(e-fnorm(z.mant), 0) 1328 } 1329 1330 // z = x / y, ignoring signs of x and y for the division 1331 // but using the sign of z for rounding the result. 1332 // x and y must have a non-empty mantissa and valid exponent. 1333 func (z *Float) uquo(x, y *Float) { 1334 if debugFloat { 1335 validateBinaryOperands(x, y) 1336 } 1337 1338 // mantissa length in words for desired result precision + 1 1339 // (at least one extra bit so we get the rounding bit after 1340 // the division) 1341 n := int(z.prec/_W) + 1 1342 1343 // compute adjusted x.mant such that we get enough result precision 1344 xadj := x.mant 1345 if d := n - len(x.mant) + len(y.mant); d > 0 { 1346 // d extra words needed => add d "0 digits" to x 1347 xadj = make(nat, len(x.mant)+d) 1348 copy(xadj[d:], x.mant) 1349 } 1350 // TODO(gri): If we have too many digits (d < 0), we should be able 1351 // to shorten x for faster division. But we must be extra careful 1352 // with rounding in that case. 1353 1354 // Compute d before division since there may be aliasing of x.mant 1355 // (via xadj) or y.mant with z.mant. 1356 d := len(xadj) - len(y.mant) 1357 1358 // divide 1359 var r nat 1360 z.mant, r = z.mant.div(nil, xadj, y.mant) 1361 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1362 1363 // The result is long enough to include (at least) the rounding bit. 1364 // If there's a non-zero remainder, the corresponding fractional part 1365 // (if it were computed), would have a non-zero sticky bit (if it were 1366 // zero, it couldn't have a non-zero remainder). 1367 var sbit uint 1368 if len(r) > 0 { 1369 sbit = 1 1370 } 1371 1372 z.setExpAndRound(e-fnorm(z.mant), sbit) 1373 } 1374 1375 // ucmp returns -1, 0, or +1, depending on whether 1376 // |x| < |y|, |x| == |y|, or |x| > |y|. 1377 // x and y must have a non-empty mantissa and valid exponent. 1378 func (x *Float) ucmp(y *Float) int { 1379 if debugFloat { 1380 validateBinaryOperands(x, y) 1381 } 1382 1383 switch { 1384 case x.exp < y.exp: 1385 return -1 1386 case x.exp > y.exp: 1387 return +1 1388 } 1389 // x.exp == y.exp 1390 1391 // compare mantissas 1392 i := len(x.mant) 1393 j := len(y.mant) 1394 for i > 0 || j > 0 { 1395 var xm, ym Word 1396 if i > 0 { 1397 i-- 1398 xm = x.mant[i] 1399 } 1400 if j > 0 { 1401 j-- 1402 ym = y.mant[j] 1403 } 1404 switch { 1405 case xm < ym: 1406 return -1 1407 case xm > ym: 1408 return +1 1409 } 1410 } 1411 1412 return 0 1413 } 1414 1415 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1416 // 1417 // When neither the inputs nor result are NaN, the sign of a product or 1418 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1419 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1420 // one of the addends’ signs; and the sign of the result of conversions, 1421 // the quantize operation, the roundToIntegral operations, and the 1422 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1423 // These rules shall apply even when operands or results are zero or infinite. 1424 // 1425 // When the sum of two operands with opposite signs (or the difference of 1426 // two operands with like signs) is exactly zero, the sign of that sum (or 1427 // difference) shall be +0 in all rounding-direction attributes except 1428 // roundTowardNegative; under that attribute, the sign of an exact zero 1429 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1430 // sign as x even when x is zero. 1431 // 1432 // See also: https://play.golang.org/p/RtH3UCt5IH 1433 1434 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1435 // it is changed to the larger of x's or y's precision before the operation. 1436 // Rounding is performed according to z's precision and rounding mode; and 1437 // z's accuracy reports the result error relative to the exact (not rounded) 1438 // result. Add panics with ErrNaN if x and y are infinities with opposite 1439 // signs. The value of z is undefined in that case. 1440 func (z *Float) Add(x, y *Float) *Float { 1441 if debugFloat { 1442 x.validate() 1443 y.validate() 1444 } 1445 1446 if z.prec == 0 { 1447 z.prec = umax32(x.prec, y.prec) 1448 } 1449 1450 if x.form == finite && y.form == finite { 1451 // x + y (common case) 1452 1453 // Below we set z.neg = x.neg, and when z aliases y this will 1454 // change the y operand's sign. This is fine, because if an 1455 // operand aliases the receiver it'll be overwritten, but we still 1456 // want the original x.neg and y.neg values when we evaluate 1457 // x.neg != y.neg, so we need to save y.neg before setting z.neg. 1458 yneg := y.neg 1459 1460 z.neg = x.neg 1461 if x.neg == yneg { 1462 // x + y == x + y 1463 // (-x) + (-y) == -(x + y) 1464 z.uadd(x, y) 1465 } else { 1466 // x + (-y) == x - y == -(y - x) 1467 // (-x) + y == y - x == -(x - y) 1468 if x.ucmp(y) > 0 { 1469 z.usub(x, y) 1470 } else { 1471 z.neg = !z.neg 1472 z.usub(y, x) 1473 } 1474 } 1475 if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { 1476 z.neg = true 1477 } 1478 return z 1479 } 1480 1481 if x.form == inf && y.form == inf && x.neg != y.neg { 1482 // +Inf + -Inf 1483 // -Inf + +Inf 1484 // value of z is undefined but make sure it's valid 1485 z.acc = Exact 1486 z.form = zero 1487 z.neg = false 1488 panic(ErrNaN{"addition of infinities with opposite signs"}) 1489 } 1490 1491 if x.form == zero && y.form == zero { 1492 // ±0 + ±0 1493 z.acc = Exact 1494 z.form = zero 1495 z.neg = x.neg && y.neg // -0 + -0 == -0 1496 return z 1497 } 1498 1499 if x.form == inf || y.form == zero { 1500 // ±Inf + y 1501 // x + ±0 1502 return z.Set(x) 1503 } 1504 1505 // ±0 + y 1506 // x + ±Inf 1507 return z.Set(y) 1508 } 1509 1510 // Sub sets z to the rounded difference x-y and returns z. 1511 // Precision, rounding, and accuracy reporting are as for Add. 1512 // Sub panics with ErrNaN if x and y are infinities with equal 1513 // signs. The value of z is undefined in that case. 1514 func (z *Float) Sub(x, y *Float) *Float { 1515 if debugFloat { 1516 x.validate() 1517 y.validate() 1518 } 1519 1520 if z.prec == 0 { 1521 z.prec = umax32(x.prec, y.prec) 1522 } 1523 1524 if x.form == finite && y.form == finite { 1525 // x - y (common case) 1526 yneg := y.neg 1527 z.neg = x.neg 1528 if x.neg != yneg { 1529 // x - (-y) == x + y 1530 // (-x) - y == -(x + y) 1531 z.uadd(x, y) 1532 } else { 1533 // x - y == x - y == -(y - x) 1534 // (-x) - (-y) == y - x == -(x - y) 1535 if x.ucmp(y) > 0 { 1536 z.usub(x, y) 1537 } else { 1538 z.neg = !z.neg 1539 z.usub(y, x) 1540 } 1541 } 1542 if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { 1543 z.neg = true 1544 } 1545 return z 1546 } 1547 1548 if x.form == inf && y.form == inf && x.neg == y.neg { 1549 // +Inf - +Inf 1550 // -Inf - -Inf 1551 // value of z is undefined but make sure it's valid 1552 z.acc = Exact 1553 z.form = zero 1554 z.neg = false 1555 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1556 } 1557 1558 if x.form == zero && y.form == zero { 1559 // ±0 - ±0 1560 z.acc = Exact 1561 z.form = zero 1562 z.neg = x.neg && !y.neg // -0 - +0 == -0 1563 return z 1564 } 1565 1566 if x.form == inf || y.form == zero { 1567 // ±Inf - y 1568 // x - ±0 1569 return z.Set(x) 1570 } 1571 1572 // ±0 - y 1573 // x - ±Inf 1574 return z.Neg(y) 1575 } 1576 1577 // Mul sets z to the rounded product x*y and returns z. 1578 // Precision, rounding, and accuracy reporting are as for Add. 1579 // Mul panics with ErrNaN if one operand is zero and the other 1580 // operand an infinity. The value of z is undefined in that case. 1581 func (z *Float) Mul(x, y *Float) *Float { 1582 if debugFloat { 1583 x.validate() 1584 y.validate() 1585 } 1586 1587 if z.prec == 0 { 1588 z.prec = umax32(x.prec, y.prec) 1589 } 1590 1591 z.neg = x.neg != y.neg 1592 1593 if x.form == finite && y.form == finite { 1594 // x * y (common case) 1595 z.umul(x, y) 1596 return z 1597 } 1598 1599 z.acc = Exact 1600 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1601 // ±0 * ±Inf 1602 // ±Inf * ±0 1603 // value of z is undefined but make sure it's valid 1604 z.form = zero 1605 z.neg = false 1606 panic(ErrNaN{"multiplication of zero with infinity"}) 1607 } 1608 1609 if x.form == inf || y.form == inf { 1610 // ±Inf * y 1611 // x * ±Inf 1612 z.form = inf 1613 return z 1614 } 1615 1616 // ±0 * y 1617 // x * ±0 1618 z.form = zero 1619 return z 1620 } 1621 1622 // Quo sets z to the rounded quotient x/y and returns z. 1623 // Precision, rounding, and accuracy reporting are as for Add. 1624 // Quo panics with ErrNaN if both operands are zero or infinities. 1625 // The value of z is undefined in that case. 1626 func (z *Float) Quo(x, y *Float) *Float { 1627 if debugFloat { 1628 x.validate() 1629 y.validate() 1630 } 1631 1632 if z.prec == 0 { 1633 z.prec = umax32(x.prec, y.prec) 1634 } 1635 1636 z.neg = x.neg != y.neg 1637 1638 if x.form == finite && y.form == finite { 1639 // x / y (common case) 1640 z.uquo(x, y) 1641 return z 1642 } 1643 1644 z.acc = Exact 1645 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1646 // ±0 / ±0 1647 // ±Inf / ±Inf 1648 // value of z is undefined but make sure it's valid 1649 z.form = zero 1650 z.neg = false 1651 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1652 } 1653 1654 if x.form == zero || y.form == inf { 1655 // ±0 / y 1656 // x / ±Inf 1657 z.form = zero 1658 return z 1659 } 1660 1661 // x / ±0 1662 // ±Inf / y 1663 z.form = inf 1664 return z 1665 } 1666 1667 // Cmp compares x and y and returns: 1668 // 1669 // -1 if x < y 1670 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1671 // +1 if x > y 1672 // 1673 func (x *Float) Cmp(y *Float) int { 1674 if debugFloat { 1675 x.validate() 1676 y.validate() 1677 } 1678 1679 mx := x.ord() 1680 my := y.ord() 1681 switch { 1682 case mx < my: 1683 return -1 1684 case mx > my: 1685 return +1 1686 } 1687 // mx == my 1688 1689 // only if |mx| == 1 we have to compare the mantissae 1690 switch mx { 1691 case -1: 1692 return y.ucmp(x) 1693 case +1: 1694 return x.ucmp(y) 1695 } 1696 1697 return 0 1698 } 1699 1700 // ord classifies x and returns: 1701 // 1702 // -2 if -Inf == x 1703 // -1 if -Inf < x < 0 1704 // 0 if x == 0 (signed or unsigned) 1705 // +1 if 0 < x < +Inf 1706 // +2 if x == +Inf 1707 // 1708 func (x *Float) ord() int { 1709 var m int 1710 switch x.form { 1711 case finite: 1712 m = 1 1713 case zero: 1714 return 0 1715 case inf: 1716 m = 2 1717 } 1718 if x.neg { 1719 m = -m 1720 } 1721 return m 1722 } 1723 1724 func umax32(x, y uint32) uint32 { 1725 if x > y { 1726 return x 1727 } 1728 return y 1729 } 1730