Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

net: TestAcceptTimeout still flaky #17948

Closed
bradfitz opened this issue Nov 16, 2016 · 11 comments
Closed

net: TestAcceptTimeout still flaky #17948

bradfitz opened this issue Nov 16, 2016 · 11 comments
Labels
NeedsFix The path to resolution is known, but the work has not been done. Testing An issue that has been verified to require only test changes, not just a test failure.
Milestone

Comments

@bradfitz
Copy link
Contributor

TestAcceptTimeout is still flaky:

https://build.golang.org/log/74ef15095b884eff2e47c1d1ed91a7643c2424cb

panic: test timed out after 3m0s

goroutine 984 [running]:
panic(0x12368c0, 0xc420078150)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/runtime/panic.go:531 +0x1cf
testing.startAlarm.func1()
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:1041 +0xf9
created by time.goFunc
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/time/sleep.go:170 +0x44

goroutine 1 [chan receive, 2 minutes]:
testing.tRunner.func1(0xc4200c40e0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:655 +0x17e
testing.tRunner(0xc4200c40e0, 0xc420051db8)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
testing.runTests(0xc42008eee0, 0x13bb980, 0xd2, 0xd2, 0x0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:906 +0x29d
testing.(*M).Run(0xc420193f20, 0x13d9200)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:841 +0xfc
net.TestMain(0xc420051f20)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/main_test.go:50 +0x39
main.main()
	net/_test/_testmain.go:506 +0xf7

goroutine 17 [syscall, 2 minutes, locked to thread]:
runtime.goexit()
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/runtime/asm_amd64.s:2184 +0x1

goroutine 848 [chan send, 2 minutes]:
testing.tRunner.func1(0xc42047a9a0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc42047a9a0, 0x128ef18)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 874 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4200c4ee0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4200c4ee0, 0x128ef90)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 859 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4202a4460)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4202a4460, 0x128f218)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 858 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4202a4380)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4202a4380, 0x128eef0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 857 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4202a42a0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4202a42a0, 0x128ef08)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 854 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4202a4000)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4202a4000, 0x128f230)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 856 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4202a41c0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4202a41c0, 0x128f238)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 868 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4200c4a80)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4200c4a80, 0x128e860)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 866 [semacquire, 2 minutes]:
sync.runtime_Semacquire(0xc4200e678c)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/runtime/sema.go:47 +0x34
sync.(*WaitGroup).Wait(0xc4200e6780)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/sync/waitgroup.go:131 +0x73
net.TestAcceptTimeout(0xc4200c48c0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/timeout_test.go:208 +0x3d6
testing.tRunner(0xc4200c48c0, 0x128e878)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:679 +0x13f
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 871 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4200c4c40)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4200c4c40, 0x128f1f8)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 855 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4202a40e0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4202a40e0, 0x128f228)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 875 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4200c4fc0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4200c4fc0, 0x128ef78)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 867 [chan send, 2 minutes]:
testing.tRunner.func1(0xc4200c49a0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:674 +0x208
testing.tRunner(0xc4200c49a0, 0x128e870)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:681 +0x149
created by testing.(*T).Run
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:716 +0x2c7

goroutine 978 [IO wait, 2 minutes]:
net.runtime_pollWait(0x1700900, 0x72, 0xb)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/runtime/netpoll.go:164 +0x59
net.(*pollDesc).wait(0xc4200d0538, 0x72, 0x1398580, 0x1396380)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/fd_poll_runtime.go:75 +0x38
net.(*pollDesc).waitRead(0xc4200d0538, 0xc420078550, 0x1)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/fd_poll_runtime.go:80 +0x34
net.(*netFD).Read(0xc4200d04d0, 0xc420078550, 0x1, 0x1, 0x0, 0x1398580, 0x1396380)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/fd_unix.go:250 +0x1b7
net.(*conn).Read(0xc4200f0038, 0xc420078550, 0x1, 0x1, 0x0, 0x0, 0x0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/net.go:180 +0x70
net.TestAcceptTimeout.func1(0xc4200e6780, 0x1399740, 0xc4200ee040, 0xc4200c48c0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/timeout_test.go:180 +0x205
created by net.TestAcceptTimeout
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/net/timeout_test.go:182 +0x84d

goroutine 941 [chan receive, 2 minutes]:
testing.runTests.func1.1(0xc4200c40e0)
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:905 +0x47
created by testing.runTests.func1
	/var/folders/_m/7l4dhkzx3rg5v97tqbz5wkmr0000gn/T/workdir/go/src/testing/testing.go:905 +0xaf
FAIL	net	180.040s

I'm just going to disable it for now. It's doing more harm than good.

@gopherbot
Copy link

CL https://golang.org/cl/33330 mentions this issue.

@gopherbot
Copy link

CL https://golang.org/cl/33258 mentions this issue.

@bradfitz
Copy link
Contributor Author

bradfitz commented Nov 16, 2016

Nope. Still flaky: freebsd-amd64-gce101 at 81627f0

https://build.golang.org/log/c69b9de990f663a7ea8e00a0f2b589365234aaba

@bradfitz bradfitz reopened this Nov 16, 2016
gopherbot pushed a commit that referenced this issue Nov 16, 2016
It's too flaky and doing more harm than good.

Disable it until it can be made reliable.

Updates #17948
Updates #17927

Change-Id: Iaab7f09a4060da377fcd3ca2262527fef50c558d
Reviewed-on: https://go-review.googlesource.com/33330
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
@mikioh
Copy link
Contributor

mikioh commented Nov 17, 2016

That's a different issue. We fixed two issues: a) context-based canceling based on defer function execution behaves differently than previous versions on Darwin and FreeBSD, b) TCP control segments for active close operation at passive open side don't deliver immediately on Darwin, and now have c) "dial tcp 127.0.0.1:42120: i/o timeout" at active open side.

It's fine to move forward step by step, I'm not sure why the test starts behaving unbridledly during Go 1.8 development cycle, though. Does Go 1.8 contain massive changes to runtime, net or testing packages?

@bradfitz
Copy link
Contributor Author

That's a different issue.

In any case, it's still flaky.

Does Go 1.8 contain massive changes to runtime, net or testing packages?

There have certainly been runtime changes. You would know whether there have been net changes. I don't think there have been notable testing changes.

@bradfitz bradfitz added the Testing An issue that has been verified to require only test changes, not just a test failure. label Nov 17, 2016
@bradfitz bradfitz modified the milestones: Go1.8Maybe, Go1.8 Nov 17, 2016
@quentinmit quentinmit added the NeedsFix The path to resolution is known, but the work has not been done. label Nov 17, 2016
@mikioh mikioh removed their assignment Nov 17, 2016
@bradfitz bradfitz modified the milestones: Go1.9, Go1.8Maybe Dec 8, 2016
@broady broady modified the milestones: Go1.9Maybe, Go1.9 Jul 17, 2017
@bradfitz bradfitz modified the milestones: Go1.9Maybe, Go1.10 Jul 20, 2017
@rsc rsc modified the milestones: Go1.10, Go1.11 Nov 22, 2017
@gopherbot
Copy link

Change https://golang.org/cl/105095 mentions this issue: net, net/internal/socktest: make concurrent socket system calls in test safe

@gopherbot gopherbot modified the milestones: Go1.11, Unplanned May 23, 2018
@bcmills bcmills removed this from the Unplanned milestone Dec 11, 2023
@bcmills bcmills added this to the Go1.23 milestone Dec 11, 2023
@bcmills
Copy link
Contributor

bcmills commented Dec 11, 2023

This test has been sitting in the tree with an unconditional skip for a long time. If it is actually useful we should fix it properly; otherwise we should get rid of it.

@gopherbot
Copy link

Change https://go.dev/cl/548940 mentions this issue: net: unskip and attempt to deflake TestAcceptTimeout

@gopherbot
Copy link

Change https://go.dev/cl/557438 mentions this issue: net: report timeouts more aggressively in Accept in the fake implementation

gopherbot pushed a commit that referenced this issue Jan 23, 2024
…tation

This ensures that if the listener has already timed out when Accept
is called, Accept always returns an error instead of instantaneously
accepting a connection that was already pending.

For #17948.

Change-Id: Iabef7121590df3dcc2fe428429d7c2bc2bcb6cd5
Reviewed-on: https://go-review.googlesource.com/c/go/+/557438
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
@bcmills
Copy link
Contributor

bcmills commented Jan 23, 2024

Still flaky, but now with watchflakes logs: #65240

@gopherbot
Copy link

Change https://go.dev/cl/558175 mentions this issue: net: ignore Dial errors in TestAcceptTimeout

gopherbot pushed a commit that referenced this issue Jan 24, 2024
Also use DialContext instead of just Dial so that we can ensure
the call returns before we close the listener.

The Dial in this test is intended to complete before the call to
Accept, but there is no synchronization to ensure that and sometimes
it doesn't happen. That's ok and mostly immaterial to the test, but it
does mean we need to ignore Dial errors (which can happen when the
listener is closed), and we need to be a little more careful about not
dialing a port that may have already been reused by some other test.

Fixes #65240.
Updates #17948.

Change-Id: Ife1b5c3062939441b58f4c096461bf5d7841889b
Reviewed-on: https://go-review.googlesource.com/c/go/+/558175
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Bryan Mills <bcmills@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
ezz-no pushed a commit to ezz-no/go-ezzno that referenced this issue Feb 18, 2024
…tation

This ensures that if the listener has already timed out when Accept
is called, Accept always returns an error instead of instantaneously
accepting a connection that was already pending.

For golang#17948.

Change-Id: Iabef7121590df3dcc2fe428429d7c2bc2bcb6cd5
Reviewed-on: https://go-review.googlesource.com/c/go/+/557438
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
ezz-no pushed a commit to ezz-no/go-ezzno that referenced this issue Feb 18, 2024
This test has been unconditionally skipped for over five years.
It may be that whatever was causing it to flake has been fixed.
And if it hasn't been fixed, it isn't providing any value.
Let's unskip it for the Go 1.23 development cycle and see what happens.

Let's also use a separate listener for each test case, so that a
leaked Dial goroutine from one case won't interfere with the other.

Fixes golang#17948 (maybe).

Change-Id: I239f22ca5d5a44388b9aa0ed4d81e451c6342617
Reviewed-on: https://go-review.googlesource.com/c/go/+/548940
Commit-Queue: Bryan Mills <bcmills@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
ezz-no pushed a commit to ezz-no/go-ezzno that referenced this issue Feb 18, 2024
Also use DialContext instead of just Dial so that we can ensure
the call returns before we close the listener.

The Dial in this test is intended to complete before the call to
Accept, but there is no synchronization to ensure that and sometimes
it doesn't happen. That's ok and mostly immaterial to the test, but it
does mean we need to ignore Dial errors (which can happen when the
listener is closed), and we need to be a little more careful about not
dialing a port that may have already been reused by some other test.

Fixes golang#65240.
Updates golang#17948.

Change-Id: Ife1b5c3062939441b58f4c096461bf5d7841889b
Reviewed-on: https://go-review.googlesource.com/c/go/+/558175
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Bryan Mills <bcmills@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
NeedsFix The path to resolution is known, but the work has not been done. Testing An issue that has been verified to require only test changes, not just a test failure.
Projects
None yet
Development

No branches or pull requests

7 participants