Source file test/float_lit2.go

     1  // run
     2  
     3  // Check conversion of constant to float32/float64 near min/max boundaries.
     4  
     5  // Copyright 2014 The Go Authors. All rights reserved.
     6  // Use of this source code is governed by a BSD-style
     7  // license that can be found in the LICENSE file.
     8  
     9  package main
    10  
    11  import (
    12  	"fmt"
    13  	"math"
    14  )
    15  
    16  // The largest exact float32 is f₁ = (1+1-1/2²³)×2¹²⁷ = (2-2⁻²³)×2¹²⁷ = 2¹²⁸ - 2¹⁰⁴.
    17  // The next float32 would be f₂ = (1+1)×2¹²⁷ = 1×2¹²⁸, except that exponent is out of range.
    18  // Float32 conversion rounds to the nearest float32, rounding to even mantissa:
    19  // between f₁ and f₂, values closer to f₁ round to f₁ and values closer to f₂ are rejected as out of range.
    20  // f₁ is an odd mantissa, so the halfway point (f₁+f₂)/2 rounds to f₂ and is rejected.
    21  // The halfway point is (f₁+f₂)/2 = 2¹²⁸ - 2¹⁰³.
    22  //
    23  // The same is true of float64, with different constants: s/24/53/ and s/128/1024/.
    24  
    25  const (
    26  	two24   = 1.0 * (1 << 24)
    27  	two53   = 1.0 * (1 << 53)
    28  	two64   = 1.0 * (1 << 64)
    29  	two128  = two64 * two64
    30  	two256  = two128 * two128
    31  	two512  = two256 * two256
    32  	two768  = two512 * two256
    33  	two1024 = two512 * two512
    34  
    35  	ulp32 = two128 / two24
    36  	max32 = two128 - ulp32
    37  
    38  	ulp64 = two1024 / two53
    39  	max64 = two1024 - ulp64
    40  )
    41  
    42  var cvt = []struct {
    43  	bits   uint64 // keep us honest
    44  	exact  interface{}
    45  	approx interface{}
    46  	text   string
    47  }{
    48  	// 0
    49  	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 - ulp32/2), "max32 - ulp32 - ulp32/2"},
    50  	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32), "max32 - ulp32"},
    51  	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32/2), "max32 - ulp32/2"},
    52  	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 + ulp32/2), "max32 - ulp32 + ulp32/2"},
    53  	{0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + ulp32/two64), "max32 - ulp32 + ulp32/2 + ulp32/two64"},
    54  	{0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + ulp32/two64), "max32 - ulp32/2 + ulp32/two64"},
    55  	{0x7f7fffff, float32(max32), float32(max32), "max32"},
    56  	{0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - ulp32/two64), "max32 + ulp32/2 - ulp32/two64"},
    57  
    58  	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 - ulp32/2)), "-(max32 - ulp32 - ulp32/2)"},
    59  	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32)), "-(max32 - ulp32)"},
    60  	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32/2)), "-(max32 - ulp32/2)"},
    61  	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 + ulp32/2)), "-(max32 - ulp32 + ulp32/2)"},
    62  	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + ulp32/two64)), "-(max32 - ulp32 + ulp32/2 + ulp32/two64)"},
    63  	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + ulp32/two64)), "-(max32 - ulp32/2 + ulp32/two64)"},
    64  	{0xff7fffff, float32(-(max32)), float32(-(max32)), "-(max32)"},
    65  	{0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - ulp32/two64)), "-(max32 + ulp32/2 - ulp32/two64)"},
    66  
    67  	// These are required to work: according to the Go spec, the internal float mantissa must be at least 256 bits,
    68  	// and these expressions can be represented exactly with a 256-bit mantissa.
    69  	{0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1), "max32 - ulp32 + ulp32/2 + 1"},
    70  	{0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1), "max32 - ulp32/2 + 1"},
    71  	{0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1), "max32 + ulp32/2 - 1"},
    72  	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1)), "-(max32 - ulp32 + ulp32/2 + 1)"},
    73  	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1)), "-(max32 - ulp32/2 + 1)"},
    74  	{0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1)), "-(max32 + ulp32/2 - 1)"},
    75  
    76  	{0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1/two128), "max32 - ulp32 + ulp32/2 + 1/two128"},
    77  	{0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1/two128), "max32 - ulp32/2 + 1/two128"},
    78  	{0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1/two128), "max32 + ulp32/2 - 1/two128"},
    79  	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1/two128)), "-(max32 - ulp32 + ulp32/2 + 1/two128)"},
    80  	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1/two128)), "-(max32 - ulp32/2 + 1/two128)"},
    81  	{0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1/two128)), "-(max32 + ulp32/2 - 1/two128)"},
    82  
    83  	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 - ulp64/2), "max64 - ulp64 - ulp64/2"},
    84  	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64), "max64 - ulp64"},
    85  	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64/2), "max64 - ulp64/2"},
    86  	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 + ulp64/2), "max64 - ulp64 + ulp64/2"},
    87  	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + ulp64/two64), "max64 - ulp64 + ulp64/2 + ulp64/two64"},
    88  	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + ulp64/two64), "max64 - ulp64/2 + ulp64/two64"},
    89  	{0x7fefffffffffffff, float64(max64), float64(max64), "max64"},
    90  	{0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - ulp64/two64), "max64 + ulp64/2 - ulp64/two64"},
    91  
    92  	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 - ulp64/2)), "-(max64 - ulp64 - ulp64/2)"},
    93  	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64)), "-(max64 - ulp64)"},
    94  	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64/2)), "-(max64 - ulp64/2)"},
    95  	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 + ulp64/2)), "-(max64 - ulp64 + ulp64/2)"},
    96  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + ulp64/two64)), "-(max64 - ulp64 + ulp64/2 + ulp64/two64)"},
    97  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + ulp64/two64)), "-(max64 - ulp64/2 + ulp64/two64)"},
    98  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64)), "-(max64)"},
    99  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - ulp64/two64)), "-(max64 + ulp64/2 - ulp64/two64)"},
   100  
   101  	// These are required to work.
   102  	// The mantissas are exactly 256 bits.
   103  	// max64 is just below 2¹⁰²⁴ so the bottom bit we can use is 2⁷⁶⁸.
   104  	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + two768), "max64 - ulp64 + ulp64/2 + two768"},
   105  	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + two768), "max64 - ulp64/2 + two768"},
   106  	{0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - two768), "max64 + ulp64/2 - two768"},
   107  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + two768)), "-(max64 - ulp64 + ulp64/2 + two768)"},
   108  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + two768)), "-(max64 - ulp64/2 + two768)"},
   109  	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - two768)), "-(max64 + ulp64/2 - two768)"},
   110  }
   111  
   112  var bugged = false
   113  
   114  func bug() {
   115  	if !bugged {
   116  		bugged = true
   117  		fmt.Println("BUG")
   118  	}
   119  }
   120  
   121  func main() {
   122  	u64 := math.Float64frombits(0x7fefffffffffffff) - math.Float64frombits(0x7feffffffffffffe)
   123  	if ulp64 != u64 {
   124  		bug()
   125  		fmt.Printf("ulp64=%g, want %g", ulp64, u64)
   126  	}
   127  
   128  	u32 := math.Float32frombits(0x7f7fffff) - math.Float32frombits(0x7f7ffffe)
   129  	if ulp32 != u32 {
   130  		bug()
   131  		fmt.Printf("ulp32=%g, want %g", ulp32, u32)
   132  	}
   133  
   134  	for _, c := range cvt {
   135  		if bits(c.exact) != c.bits {
   136  			bug()
   137  			fmt.Printf("%s: inconsistent table: bits=%#x (%g) but exact=%g (%#x)\n", c.text, c.bits, fromBits(c.bits, c.exact), c.exact, bits(c.exact))
   138  		}
   139  		if c.approx != c.exact || bits(c.approx) != c.bits {
   140  			bug()
   141  			fmt.Printf("%s: have %g (%#x) want %g (%#x)\n", c.text, c.approx, bits(c.approx), c.exact, c.bits)
   142  		}
   143  	}
   144  }
   145  
   146  func bits(x interface{}) interface{} {
   147  	switch x := x.(type) {
   148  	case float32:
   149  		return uint64(math.Float32bits(x))
   150  	case float64:
   151  		return math.Float64bits(x)
   152  	}
   153  	return 0
   154  }
   155  
   156  func fromBits(b uint64, x interface{}) interface{} {
   157  	switch x.(type) {
   158  	case float32:
   159  		return math.Float32frombits(uint32(b))
   160  	case float64:
   161  		return math.Float64frombits(b)
   162  	}
   163  	return "?"
   164  }
   165  

View as plain text