Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x/net/ipv6: apparent deadlock in TestPacketConnConcurrentReadWriteUnicast on darwin/amd64 #50455

Closed
bcmills opened this issue Jan 5, 2022 · 6 comments
Labels
FrozenDueToAge NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. OS-Darwin release-blocker
Milestone

Comments

@bcmills
Copy link
Contributor

bcmills commented Jan 5, 2022

panic: test timed out after 10m0s

goroutine 130 [running]:
testing.(*M).startAlarm.func1()
	/Users/gopher/workdir/go/src/testing/testing.go:2030 +0x8e
created by time.goFunc
	/Users/gopher/workdir/go/src/time/sleep.go:176 +0x32

goroutine 1 [chan receive]:
testing.(*T).Run(0xc000105a00, {0x11a6718?, 0x11809cf521?}, 0x11a9c78)
	/Users/gopher/workdir/go/src/testing/testing.go:1488 +0x37a
testing.runTests.func1(0xc000105a00?)
	/Users/gopher/workdir/go/src/testing/testing.go:1840 +0x6e
testing.tRunner(0xc000105a00, 0xc000044cd8)
	/Users/gopher/workdir/go/src/testing/testing.go:1440 +0x102
testing.runTests(0xc000134320?, {0x12c4080, 0x17, 0x17}, {0x1352a68?, 0x40?, 0x12c8380?})
	/Users/gopher/workdir/go/src/testing/testing.go:1838 +0x457
testing.(*M).Run(0xc000134320)
	/Users/gopher/workdir/go/src/testing/testing.go:1720 +0x5d9
main.main()
	_testmain.go:97 +0x1aa

goroutine 84 [chan receive]:
testing.(*T).Run(0xc0000ed6c0, {0x119d457?, 0xc0000ef720?}, 0xc0000aac40)
	/Users/gopher/workdir/go/src/testing/testing.go:1488 +0x37a
golang.org/x/net/ipv6_test.TestPacketConnConcurrentReadWriteUnicast(0x0?)
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/readwrite_test.go:321 +0x19e
testing.tRunner(0xc0000ed6c0, 0x11a9c78)
	/Users/gopher/workdir/go/src/testing/testing.go:1440 +0x102
created by testing.(*T).Run
	/Users/gopher/workdir/go/src/testing/testing.go:1487 +0x35f

goroutine 86 [semacquire]:
sync.runtime_Semacquire(0xc000092ec0?)
	/Users/gopher/workdir/go/src/runtime/sema.go:56 +0x25
sync.(*WaitGroup).Wait(0x119d493?)
	/Users/gopher/workdir/go/src/sync/waitgroup.go:136 +0x52
golang.org/x/net/ipv6_test.testPacketConnConcurrentReadWriteUnicast(0xc0000eda00, 0xc0000cf450, {0xc0000995e0, 0xf, 0xf}, {0x11d5ba0?, 0xc0000d4f00}, 0x0)
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/readwrite_test.go:508 +0x785
golang.org/x/net/ipv6_test.TestPacketConnConcurrentReadWriteUnicast.func1.1(0xc0000ed860?)
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/readwrite_test.go:329 +0x75
testing.tRunner(0xc0000eda00, 0xc000092d80)
	/Users/gopher/workdir/go/src/testing/testing.go:1440 +0x102
created by testing.(*T).Run
	/Users/gopher/workdir/go/src/testing/testing.go:1487 +0x35f

goroutine 85 [chan receive]:
testing.(*T).Run(0xc0000ed860, {0x119da1e?, 0x515?}, 0xc000092d80)
	/Users/gopher/workdir/go/src/testing/testing.go:1488 +0x37a
golang.org/x/net/ipv6_test.TestPacketConnConcurrentReadWriteUnicast.func1(0xc0000ed860)
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/readwrite_test.go:328 +0x225
testing.tRunner(0xc0000ed860, 0xc0000aac40)
	/Users/gopher/workdir/go/src/testing/testing.go:1440 +0x102
created by testing.(*T).Run
	/Users/gopher/workdir/go/src/testing/testing.go:1487 +0x35f

goroutine 126 [IO wait]:
internal/poll.runtime_pollWait(0x1786878, 0x72)
	/Users/gopher/workdir/go/src/runtime/netpoll.go:233 +0x89
internal/poll.(*pollDesc).wait(0xc0000da500?, 0x0?, 0x0)
	/Users/gopher/workdir/go/src/internal/poll/fd_poll_runtime.go:83 +0x32
internal/poll.(*pollDesc).waitRead(...)
	/Users/gopher/workdir/go/src/internal/poll/fd_poll_runtime.go:88
internal/poll.(*FD).RawRead(0xc0000da500, 0xc0000d5260)
	/Users/gopher/workdir/go/src/internal/poll/fd_unix.go:766 +0x145
net.(*rawConn).Read(0xc0000a8160, 0x0?)
	/Users/gopher/workdir/go/src/net/rawconn.go:43 +0x45
golang.org/x/net/internal/socket.(*Conn).recvMsg(0xc0000aac60, 0xc000394698, 0x0)
	/Users/gopher/workdir/gopath/src/golang.org/x/net/internal/socket/rawconn_msg.go:29 +0x202
golang.org/x/net/internal/socket.(*Conn).RecvMsg(...)
	/Users/gopher/workdir/gopath/src/golang.org/x/net/internal/socket/socket.go:247
golang.org/x/net/ipv6.(*payloadHandler).ReadFrom(0xc0000cf460, {0xc0000a4380, 0x80, 0x80})
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/payload_cmsg.go:32 +0x1fc
golang.org/x/net/ipv6_test.testPacketConnConcurrentReadWriteUnicast.func1()
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/readwrite_test.go:375 +0xda
created by golang.org/x/net/ipv6_test.testPacketConnConcurrentReadWriteUnicast
	/Users/gopher/workdir/gopath/src/golang.org/x/net/ipv6/readwrite_test.go:505 +0x771
FAIL	golang.org/x/net/ipv6	600.594s

greplogs --dashboard -md -l -e '(?ms)panic: test timed out.*\nsync\.\(\*WaitGroup\)\.Wait.*\n\t.*\ngolang\.org/x/net/ipv6_test.testPacketConnConcurrentReadWriteUnicast' --since=2021-01-01

2022-01-05T16:44:50-5b0dc2d-002283e/darwin-amd64-12_0
2021-11-11T16:01:37-58aab5e-3729a67/darwin-amd64-11_0
2021-10-05T18:39:53-d4b1ae0-7e69c5d/darwin-amd64-11_0
2021-10-01T17:27:29-e81a3d9-3357624/darwin-amd64-10_14
2021-09-27T18:44:47-4e4d966-d4007ae/darwin-amd64-10_14
2021-09-24T15:19:03-3ad01bb-7d57324/darwin-amd64-11_0
2021-08-24T16:55:17-60bc85c-daa55b2/darwin-amd64-10_14
2021-03-15T14:36:23-e18ecbb-7bfe32f/darwin-amd64-10_14

CC @ianlancetaylor

@bcmills bcmills added OS-Darwin NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. labels Jan 5, 2022
@bcmills bcmills added this to the Backlog milestone Jan 5, 2022
@bcmills
Copy link
Contributor Author

bcmills commented Jan 6, 2022

It is always the reader that gets stuck, but sometimes it is the Batch reader and other times it is the ToFrom reader.

@bcmills
Copy link
Contributor Author

bcmills commented Jan 6, 2022

This output is consistent with what would happen if a writer goroutine errors out before writing.

The t.Errorf output from the writer is likely swallowed by the timeout panic.

@gopherbot
Copy link

Change https://golang.org/cl/376094 mentions this issue: ipv6: shut down the PacketConn on failure in TestPacketConnConcurrentReadWriteUnicast

@bcmills
Copy link
Contributor Author

bcmills commented Jan 6, 2022

A guess: perhaps the writers are failing with ENOBUFS, as also occurred on the darwin builders in #37319.

@gopherbot
Copy link

Change https://golang.org/cl/376095 mentions this issue: ipv6: retry ENOBUFS errors in TestPacketConnConcurrentReadWriteUnicast

@bcmills
Copy link
Contributor Author

bcmills commented Jan 7, 2022

(Marking as release-blocker via #11811.)

@bcmills bcmills modified the milestones: Backlog, Go1.18 Jan 7, 2022
gopherbot pushed a commit to golang/net that referenced this issue Jan 7, 2022
…ReadWriteUnicast

This avoids a deadlock (observed in golang/go#50455) that may
otherwise swallow the error logs from a failure, which may help us
better diagnose the underlying problem.

For golang/go#50455.

Change-Id: Id73bd9589ae23385a433da0b24840ef945601f63
Reviewed-on: https://go-review.googlesource.com/c/net/+/376094
Trust: Bryan Mills <bcmills@google.com>
Run-TryBot: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
@bcmills bcmills self-assigned this Jan 7, 2022
@golang golang deleted a comment from gopherbot Feb 24, 2022
@golang golang deleted a comment from gopherbot Feb 24, 2022
@rsc rsc unassigned bcmills Jun 22, 2022
WeiminShang added a commit to WeiminShang/net that referenced this issue Nov 16, 2022
…ReadWriteUnicast

This avoids a deadlock (observed in golang/go#50455) that may
otherwise swallow the error logs from a failure, which may help us
better diagnose the underlying problem.

For golang/go#50455.

Change-Id: Id73bd9589ae23385a433da0b24840ef945601f63
Reviewed-on: https://go-review.googlesource.com/c/net/+/376094
Trust: Bryan Mills <bcmills@google.com>
Run-TryBot: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
WeiminShang added a commit to WeiminShang/net that referenced this issue Nov 16, 2022
This change is sheer speculation based on the failures observed in
golang/go#37319.

(A deadlock in the test prevented us from seeing the actual failure
mode of golang/go#50455 up until CL 376094, and it isn't obvious to me
that we should wait for another failure before trying a likely — and
otherwise harmless — fix.)

Fixes golang/go#50455.
(Maybe.)

Change-Id: I7483eb2243832d07cb1f815da196b3978a50c6b3
Reviewed-on: https://go-review.googlesource.com/c/net/+/376095
Trust: Bryan Mills <bcmills@google.com>
Run-TryBot: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
@golang golang locked and limited conversation to collaborators Jun 22, 2023
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
FrozenDueToAge NeedsInvestigation Someone must examine and confirm this is a valid issue and not a duplicate of an existing one. OS-Darwin release-blocker
Projects
None yet
Development

No branches or pull requests

2 participants