Black Lives Matter. Support the Equal Justice Initiative.

Source file src/runtime/netpoll.go

Documentation: runtime

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris windows
     6  
     7  package runtime
     8  
     9  import (
    10  	"runtime/internal/atomic"
    11  	"unsafe"
    12  )
    13  
    14  // Integrated network poller (platform-independent part).
    15  // A particular implementation (epoll/kqueue/port/AIX/Windows)
    16  // must define the following functions:
    17  //
    18  // func netpollinit()
    19  //     Initialize the poller. Only called once.
    20  //
    21  // func netpollopen(fd uintptr, pd *pollDesc) int32
    22  //     Arm edge-triggered notifications for fd. The pd argument is to pass
    23  //     back to netpollready when fd is ready. Return an errno value.
    24  //
    25  // func netpoll(delta int64) gList
    26  //     Poll the network. If delta < 0, block indefinitely. If delta == 0,
    27  //     poll without blocking. If delta > 0, block for up to delta nanoseconds.
    28  //     Return a list of goroutines built by calling netpollready.
    29  //
    30  // func netpollBreak()
    31  //     Wake up the network poller, assumed to be blocked in netpoll.
    32  //
    33  // func netpollIsPollDescriptor(fd uintptr) bool
    34  //     Reports whether fd is a file descriptor used by the poller.
    35  
    36  // Error codes returned by runtime_pollReset and runtime_pollWait.
    37  // These must match the values in internal/poll/fd_poll_runtime.go.
    38  const (
    39  	pollNoError        = 0 // no error
    40  	pollErrClosing     = 1 // descriptor is closed
    41  	pollErrTimeout     = 2 // I/O timeout
    42  	pollErrNotPollable = 3 // general error polling descriptor
    43  )
    44  
    45  // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
    46  // goroutines respectively. The semaphore can be in the following states:
    47  // pdReady - io readiness notification is pending;
    48  //           a goroutine consumes the notification by changing the state to nil.
    49  // pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
    50  //          the goroutine commits to park by changing the state to G pointer,
    51  //          or, alternatively, concurrent io notification changes the state to pdReady,
    52  //          or, alternatively, concurrent timeout/close changes the state to nil.
    53  // G pointer - the goroutine is blocked on the semaphore;
    54  //             io notification or timeout/close changes the state to pdReady or nil respectively
    55  //             and unparks the goroutine.
    56  // nil - none of the above.
    57  const (
    58  	pdReady uintptr = 1
    59  	pdWait  uintptr = 2
    60  )
    61  
    62  const pollBlockSize = 4 * 1024
    63  
    64  // Network poller descriptor.
    65  //
    66  // No heap pointers.
    67  //
    68  //go:notinheap
    69  type pollDesc struct {
    70  	link *pollDesc // in pollcache, protected by pollcache.lock
    71  
    72  	// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
    73  	// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
    74  	// pollReset, pollWait, pollWaitCanceled and runtime¬∑netpollready (IO readiness notification)
    75  	// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
    76  	// in a lock-free way by all operations.
    77  	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
    78  	// that will blow up when GC starts moving objects.
    79  	lock    mutex // protects the following fields
    80  	fd      uintptr
    81  	closing bool
    82  	everr   bool      // marks event scanning error happened
    83  	user    uint32    // user settable cookie
    84  	rseq    uintptr   // protects from stale read timers
    85  	rg      uintptr   // pdReady, pdWait, G waiting for read or nil
    86  	rt      timer     // read deadline timer (set if rt.f != nil)
    87  	rd      int64     // read deadline
    88  	wseq    uintptr   // protects from stale write timers
    89  	wg      uintptr   // pdReady, pdWait, G waiting for write or nil
    90  	wt      timer     // write deadline timer
    91  	wd      int64     // write deadline
    92  	self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
    93  }
    94  
    95  type pollCache struct {
    96  	lock  mutex
    97  	first *pollDesc
    98  	// PollDesc objects must be type-stable,
    99  	// because we can get ready notification from epoll/kqueue
   100  	// after the descriptor is closed/reused.
   101  	// Stale notifications are detected using seq variable,
   102  	// seq is incremented when deadlines are changed or descriptor is reused.
   103  }
   104  
   105  var (
   106  	netpollInitLock mutex
   107  	netpollInited   uint32
   108  
   109  	pollcache      pollCache
   110  	netpollWaiters uint32
   111  )
   112  
   113  //go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
   114  func poll_runtime_pollServerInit() {
   115  	netpollGenericInit()
   116  }
   117  
   118  func netpollGenericInit() {
   119  	if atomic.Load(&netpollInited) == 0 {
   120  		lockInit(&netpollInitLock, lockRankNetpollInit)
   121  		lock(&netpollInitLock)
   122  		if netpollInited == 0 {
   123  			netpollinit()
   124  			atomic.Store(&netpollInited, 1)
   125  		}
   126  		unlock(&netpollInitLock)
   127  	}
   128  }
   129  
   130  func netpollinited() bool {
   131  	return atomic.Load(&netpollInited) != 0
   132  }
   133  
   134  //go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor
   135  
   136  // poll_runtime_isPollServerDescriptor reports whether fd is a
   137  // descriptor being used by netpoll.
   138  func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
   139  	return netpollIsPollDescriptor(fd)
   140  }
   141  
   142  //go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
   143  func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
   144  	pd := pollcache.alloc()
   145  	lock(&pd.lock)
   146  	if pd.wg != 0 && pd.wg != pdReady {
   147  		throw("runtime: blocked write on free polldesc")
   148  	}
   149  	if pd.rg != 0 && pd.rg != pdReady {
   150  		throw("runtime: blocked read on free polldesc")
   151  	}
   152  	pd.fd = fd
   153  	pd.closing = false
   154  	pd.everr = false
   155  	pd.rseq++
   156  	pd.rg = 0
   157  	pd.rd = 0
   158  	pd.wseq++
   159  	pd.wg = 0
   160  	pd.wd = 0
   161  	pd.self = pd
   162  	unlock(&pd.lock)
   163  
   164  	var errno int32
   165  	errno = netpollopen(fd, pd)
   166  	return pd, int(errno)
   167  }
   168  
   169  //go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose
   170  func poll_runtime_pollClose(pd *pollDesc) {
   171  	if !pd.closing {
   172  		throw("runtime: close polldesc w/o unblock")
   173  	}
   174  	if pd.wg != 0 && pd.wg != pdReady {
   175  		throw("runtime: blocked write on closing polldesc")
   176  	}
   177  	if pd.rg != 0 && pd.rg != pdReady {
   178  		throw("runtime: blocked read on closing polldesc")
   179  	}
   180  	netpollclose(pd.fd)
   181  	pollcache.free(pd)
   182  }
   183  
   184  func (c *pollCache) free(pd *pollDesc) {
   185  	lock(&c.lock)
   186  	pd.link = c.first
   187  	c.first = pd
   188  	unlock(&c.lock)
   189  }
   190  
   191  // poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
   192  // prepares a descriptor for polling in mode, which is 'r' or 'w'.
   193  // This returns an error code; the codes are defined above.
   194  //go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
   195  func poll_runtime_pollReset(pd *pollDesc, mode int) int {
   196  	errcode := netpollcheckerr(pd, int32(mode))
   197  	if errcode != pollNoError {
   198  		return errcode
   199  	}
   200  	if mode == 'r' {
   201  		pd.rg = 0
   202  	} else if mode == 'w' {
   203  		pd.wg = 0
   204  	}
   205  	return pollNoError
   206  }
   207  
   208  // poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
   209  // waits for a descriptor to be ready for reading or writing,
   210  // according to mode, which is 'r' or 'w'.
   211  // This returns an error code; the codes are defined above.
   212  //go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
   213  func poll_runtime_pollWait(pd *pollDesc, mode int) int {
   214  	errcode := netpollcheckerr(pd, int32(mode))
   215  	if errcode != pollNoError {
   216  		return errcode
   217  	}
   218  	// As for now only Solaris, illumos, and AIX use level-triggered IO.
   219  	if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
   220  		netpollarm(pd, mode)
   221  	}
   222  	for !netpollblock(pd, int32(mode), false) {
   223  		errcode = netpollcheckerr(pd, int32(mode))
   224  		if errcode != pollNoError {
   225  			return errcode
   226  		}
   227  		// Can happen if timeout has fired and unblocked us,
   228  		// but before we had a chance to run, timeout has been reset.
   229  		// Pretend it has not happened and retry.
   230  	}
   231  	return pollNoError
   232  }
   233  
   234  //go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
   235  func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) {
   236  	// This function is used only on windows after a failed attempt to cancel
   237  	// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
   238  	for !netpollblock(pd, int32(mode), true) {
   239  	}
   240  }
   241  
   242  //go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline
   243  func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
   244  	lock(&pd.lock)
   245  	if pd.closing {
   246  		unlock(&pd.lock)
   247  		return
   248  	}
   249  	rd0, wd0 := pd.rd, pd.wd
   250  	combo0 := rd0 > 0 && rd0 == wd0
   251  	if d > 0 {
   252  		d += nanotime()
   253  		if d <= 0 {
   254  			// If the user has a deadline in the future, but the delay calculation
   255  			// overflows, then set the deadline to the maximum possible value.
   256  			d = 1<<63 - 1
   257  		}
   258  	}
   259  	if mode == 'r' || mode == 'r'+'w' {
   260  		pd.rd = d
   261  	}
   262  	if mode == 'w' || mode == 'r'+'w' {
   263  		pd.wd = d
   264  	}
   265  	combo := pd.rd > 0 && pd.rd == pd.wd
   266  	rtf := netpollReadDeadline
   267  	if combo {
   268  		rtf = netpollDeadline
   269  	}
   270  	if pd.rt.f == nil {
   271  		if pd.rd > 0 {
   272  			pd.rt.f = rtf
   273  			// Copy current seq into the timer arg.
   274  			// Timer func will check the seq against current descriptor seq,
   275  			// if they differ the descriptor was reused or timers were reset.
   276  			pd.rt.arg = pd.makeArg()
   277  			pd.rt.seq = pd.rseq
   278  			resettimer(&pd.rt, pd.rd)
   279  		}
   280  	} else if pd.rd != rd0 || combo != combo0 {
   281  		pd.rseq++ // invalidate current timers
   282  		if pd.rd > 0 {
   283  			modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq)
   284  		} else {
   285  			deltimer(&pd.rt)
   286  			pd.rt.f = nil
   287  		}
   288  	}
   289  	if pd.wt.f == nil {
   290  		if pd.wd > 0 && !combo {
   291  			pd.wt.f = netpollWriteDeadline
   292  			pd.wt.arg = pd.makeArg()
   293  			pd.wt.seq = pd.wseq
   294  			resettimer(&pd.wt, pd.wd)
   295  		}
   296  	} else if pd.wd != wd0 || combo != combo0 {
   297  		pd.wseq++ // invalidate current timers
   298  		if pd.wd > 0 && !combo {
   299  			modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq)
   300  		} else {
   301  			deltimer(&pd.wt)
   302  			pd.wt.f = nil
   303  		}
   304  	}
   305  	// If we set the new deadline in the past, unblock currently pending IO if any.
   306  	var rg, wg *g
   307  	if pd.rd < 0 || pd.wd < 0 {
   308  		atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
   309  		if pd.rd < 0 {
   310  			rg = netpollunblock(pd, 'r', false)
   311  		}
   312  		if pd.wd < 0 {
   313  			wg = netpollunblock(pd, 'w', false)
   314  		}
   315  	}
   316  	unlock(&pd.lock)
   317  	if rg != nil {
   318  		netpollgoready(rg, 3)
   319  	}
   320  	if wg != nil {
   321  		netpollgoready(wg, 3)
   322  	}
   323  }
   324  
   325  //go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock
   326  func poll_runtime_pollUnblock(pd *pollDesc) {
   327  	lock(&pd.lock)
   328  	if pd.closing {
   329  		throw("runtime: unblock on closing polldesc")
   330  	}
   331  	pd.closing = true
   332  	pd.rseq++
   333  	pd.wseq++
   334  	var rg, wg *g
   335  	atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock
   336  	rg = netpollunblock(pd, 'r', false)
   337  	wg = netpollunblock(pd, 'w', false)
   338  	if pd.rt.f != nil {
   339  		deltimer(&pd.rt)
   340  		pd.rt.f = nil
   341  	}
   342  	if pd.wt.f != nil {
   343  		deltimer(&pd.wt)
   344  		pd.wt.f = nil
   345  	}
   346  	unlock(&pd.lock)
   347  	if rg != nil {
   348  		netpollgoready(rg, 3)
   349  	}
   350  	if wg != nil {
   351  		netpollgoready(wg, 3)
   352  	}
   353  }
   354  
   355  // netpollready is called by the platform-specific netpoll function.
   356  // It declares that the fd associated with pd is ready for I/O.
   357  // The toRun argument is used to build a list of goroutines to return
   358  // from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
   359  // whether the fd is ready for reading or writing or both.
   360  //
   361  // This may run while the world is stopped, so write barriers are not allowed.
   362  //go:nowritebarrier
   363  func netpollready(toRun *gList, pd *pollDesc, mode int32) {
   364  	var rg, wg *g
   365  	if mode == 'r' || mode == 'r'+'w' {
   366  		rg = netpollunblock(pd, 'r', true)
   367  	}
   368  	if mode == 'w' || mode == 'r'+'w' {
   369  		wg = netpollunblock(pd, 'w', true)
   370  	}
   371  	if rg != nil {
   372  		toRun.push(rg)
   373  	}
   374  	if wg != nil {
   375  		toRun.push(wg)
   376  	}
   377  }
   378  
   379  func netpollcheckerr(pd *pollDesc, mode int32) int {
   380  	if pd.closing {
   381  		return pollErrClosing
   382  	}
   383  	if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
   384  		return pollErrTimeout
   385  	}
   386  	// Report an event scanning error only on a read event.
   387  	// An error on a write event will be captured in a subsequent
   388  	// write call that is able to report a more specific error.
   389  	if mode == 'r' && pd.everr {
   390  		return pollErrNotPollable
   391  	}
   392  	return pollNoError
   393  }
   394  
   395  func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
   396  	r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
   397  	if r {
   398  		// Bump the count of goroutines waiting for the poller.
   399  		// The scheduler uses this to decide whether to block
   400  		// waiting for the poller if there is nothing else to do.
   401  		atomic.Xadd(&netpollWaiters, 1)
   402  	}
   403  	return r
   404  }
   405  
   406  func netpollgoready(gp *g, traceskip int) {
   407  	atomic.Xadd(&netpollWaiters, -1)
   408  	goready(gp, traceskip+1)
   409  }
   410  
   411  // returns true if IO is ready, or false if timedout or closed
   412  // waitio - wait only for completed IO, ignore errors
   413  func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
   414  	gpp := &pd.rg
   415  	if mode == 'w' {
   416  		gpp = &pd.wg
   417  	}
   418  
   419  	// set the gpp semaphore to pdWait
   420  	for {
   421  		old := *gpp
   422  		if old == pdReady {
   423  			*gpp = 0
   424  			return true
   425  		}
   426  		if old != 0 {
   427  			throw("runtime: double wait")
   428  		}
   429  		if atomic.Casuintptr(gpp, 0, pdWait) {
   430  			break
   431  		}
   432  	}
   433  
   434  	// need to recheck error states after setting gpp to pdWait
   435  	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
   436  	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
   437  	if waitio || netpollcheckerr(pd, mode) == 0 {
   438  		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
   439  	}
   440  	// be careful to not lose concurrent pdReady notification
   441  	old := atomic.Xchguintptr(gpp, 0)
   442  	if old > pdWait {
   443  		throw("runtime: corrupted polldesc")
   444  	}
   445  	return old == pdReady
   446  }
   447  
   448  func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
   449  	gpp := &pd.rg
   450  	if mode == 'w' {
   451  		gpp = &pd.wg
   452  	}
   453  
   454  	for {
   455  		old := *gpp
   456  		if old == pdReady {
   457  			return nil
   458  		}
   459  		if old == 0 && !ioready {
   460  			// Only set pdReady for ioready. runtime_pollWait
   461  			// will check for timeout/cancel before waiting.
   462  			return nil
   463  		}
   464  		var new uintptr
   465  		if ioready {
   466  			new = pdReady
   467  		}
   468  		if atomic.Casuintptr(gpp, old, new) {
   469  			if old == pdWait {
   470  				old = 0
   471  			}
   472  			return (*g)(unsafe.Pointer(old))
   473  		}
   474  	}
   475  }
   476  
   477  func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
   478  	lock(&pd.lock)
   479  	// Seq arg is seq when the timer was set.
   480  	// If it's stale, ignore the timer event.
   481  	currentSeq := pd.rseq
   482  	if !read {
   483  		currentSeq = pd.wseq
   484  	}
   485  	if seq != currentSeq {
   486  		// The descriptor was reused or timers were reset.
   487  		unlock(&pd.lock)
   488  		return
   489  	}
   490  	var rg *g
   491  	if read {
   492  		if pd.rd <= 0 || pd.rt.f == nil {
   493  			throw("runtime: inconsistent read deadline")
   494  		}
   495  		pd.rd = -1
   496  		atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock
   497  		rg = netpollunblock(pd, 'r', false)
   498  	}
   499  	var wg *g
   500  	if write {
   501  		if pd.wd <= 0 || pd.wt.f == nil && !read {
   502  			throw("runtime: inconsistent write deadline")
   503  		}
   504  		pd.wd = -1
   505  		atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock
   506  		wg = netpollunblock(pd, 'w', false)
   507  	}
   508  	unlock(&pd.lock)
   509  	if rg != nil {
   510  		netpollgoready(rg, 0)
   511  	}
   512  	if wg != nil {
   513  		netpollgoready(wg, 0)
   514  	}
   515  }
   516  
   517  func netpollDeadline(arg interface{}, seq uintptr) {
   518  	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
   519  }
   520  
   521  func netpollReadDeadline(arg interface{}, seq uintptr) {
   522  	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
   523  }
   524  
   525  func netpollWriteDeadline(arg interface{}, seq uintptr) {
   526  	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
   527  }
   528  
   529  func (c *pollCache) alloc() *pollDesc {
   530  	lock(&c.lock)
   531  	if c.first == nil {
   532  		const pdSize = unsafe.Sizeof(pollDesc{})
   533  		n := pollBlockSize / pdSize
   534  		if n == 0 {
   535  			n = 1
   536  		}
   537  		// Must be in non-GC memory because can be referenced
   538  		// only from epoll/kqueue internals.
   539  		mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
   540  		for i := uintptr(0); i < n; i++ {
   541  			pd := (*pollDesc)(add(mem, i*pdSize))
   542  			pd.link = c.first
   543  			c.first = pd
   544  		}
   545  	}
   546  	pd := c.first
   547  	c.first = pd.link
   548  	lockInit(&pd.lock, lockRankPollDesc)
   549  	unlock(&c.lock)
   550  	return pd
   551  }
   552  
   553  // makeArg converts pd to an interface{}.
   554  // makeArg does not do any allocation. Normally, such
   555  // a conversion requires an allocation because pointers to
   556  // go:notinheap types (which pollDesc is) must be stored
   557  // in interfaces indirectly. See issue 42076.
   558  func (pd *pollDesc) makeArg() (i interface{}) {
   559  	x := (*eface)(unsafe.Pointer(&i))
   560  	x._type = pdType
   561  	x.data = unsafe.Pointer(&pd.self)
   562  	return
   563  }
   564  
   565  var (
   566  	pdEface interface{} = (*pollDesc)(nil)
   567  	pdType  *_type      = efaceOf(&pdEface)._type
   568  )
   569  

View as plain text