|
|
Created:
11 years, 5 months ago by Hierro Modified:
11 years, 5 months ago CC:
golang-dev, bradfitz, tad.glines_gmail.com, bketelsen Visibility:
Public. |
Descriptiondatabase/sql: Fix connection leak and potential deadlock
CL 10726044 introduced a race condition which causes connections
to be leaked under certain circumstances. If SetMaxOpenConns is
used, the application eventually deadlocks. Otherwise, the number
of open connections just keep growing indefinitely.
Fixes issue 6593
Patch Set 1 #Patch Set 2 : diff -r 062f1f0ea8ba https://code.google.com/p/go #Patch Set 3 : diff -r 062f1f0ea8ba https://code.google.com/p/go #Patch Set 4 : diff -r 062f1f0ea8ba https://code.google.com/p/go #Patch Set 5 : diff -r 062f1f0ea8ba https://code.google.com/p/go #
Total comments: 5
Patch Set 6 : diff -r 9d0d95344a6c https://code.google.com/p/go #
MessagesTotal messages: 28
Hello golang-dev@googlegroups.com, I'd like you to review this change to https://code.google.com/p/go
Sign in to reply to this message.
No test to demonstrate the problem/fix? What are the certain conditions? On Oct 11, 2013 9:04 AM, <alberto.garcia.hierro@gmail.com> wrote: > Reviewers: golang-dev1, > > Message: > Hello golang-dev@googlegroups.com, > > I'd like you to review this change to > https://code.google.com/p/go > > > Description: > database/sql: Fix connection leak and potential deadlock > > CL 10726044 introduced a race condition which causes connections > to be leaked under certain circumstances. If SetMaxOpenConns is > used, the application eventually deadlocks. Otherwise, the number > of open connections just keep growing indefinitely. > > Please review this at https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... > > Affected files (+16, -24 lines): > M src/pkg/database/sql/sql.go > > > Index: src/pkg/database/sql/sql.go > ==============================**==============================**======= > --- a/src/pkg/database/sql/sql.go > +++ b/src/pkg/database/sql/sql.go > @@ -427,6 +427,7 @@ > dsn: dataSourceName, > openerCh: make(chan struct{}, connectionRequestQueueSize), > lastPut: make(map[*driverConn]string), > + maxIdle: defaultMaxIdleConns, > } > db.freeConn = list.New() > db.connRequests = list.New() > @@ -482,19 +483,6 @@ > > const defaultMaxIdleConns = 2 > > -func (db *DB) maxIdleConnsLocked() int { > - n := db.maxIdle > - switch { > - case n == 0: > - // TODO(bradfitz): ask driver, if supported, for its > default preference > - return defaultMaxIdleConns > - case n < 0: > - return 0 > - default: > - return n > - } > -} > - > // SetMaxIdleConns sets the maximum number of connections in the idle > // connection pool. > // > @@ -505,17 +493,21 @@ > func (db *DB) SetMaxIdleConns(n int) { > db.mu.Lock() > defer db.mu.Unlock() > - if n > 0 { > - db.maxIdle = n > - } else { > + switch { > + case n == 0: > + // TODO(bradfitz): ask driver, if supported, for its > default preference > + db.maxIdle = defaultMaxIdleConns > + case n < 0: > // No idle connections. > db.maxIdle = -1 > + default: > + db.maxIdle = n > } > // Make sure maxIdle doesn't exceed maxOpen > - if db.maxOpen > 0 && db.maxIdleConnsLocked() > db.maxOpen { > + if db.maxOpen > 0 && db.maxIdle > db.maxOpen { > db.maxIdle = db.maxOpen > } > - for db.freeConn.Len() > db.maxIdleConnsLocked() { > + for db.freeConn.Len() > db.maxIdle { > dc := db.freeConn.Back().Value.(***driverConn) > dc.listElem = nil > db.freeConn.Remove(db.**freeConn.Back()) > @@ -537,7 +529,7 @@ > if n < 0 { > db.maxOpen = 0 > } > - syncMaxIdle := db.maxOpen > 0 && db.maxIdleConnsLocked() > > db.maxOpen > + syncMaxIdle := db.maxOpen > 0 && db.maxIdle > db.maxOpen > db.mu.Unlock() > if syncMaxIdle { > db.SetMaxIdleConns(n) > @@ -582,7 +574,7 @@ > } > db.pendingOpens-- > if err != nil { > - db.putConnDBLocked(nil, err) > + db.putConnDBLocked(nil, err, false) > return > } > dc := &driverConn{ > @@ -591,7 +583,7 @@ > } > db.addDepLocked(dc, dc) > db.numOpen++ > - db.putConnDBLocked(dc, err) > + db.putConnDBLocked(dc, nil, true) > } > > // connRequest represents one request for a new connection > @@ -753,7 +745,7 @@ > if putConnHook != nil { > putConnHook(db, dc) > } > - added := db.putConnDBLocked(dc, nil) > + added := db.putConnDBLocked(dc, nil, false) > db.mu.Unlock() > > if !added { > @@ -770,7 +762,7 @@ > // If err == nil, then dc must not equal nil. > // If a connRequest was fullfilled or the *driverConn was placed in the > // freeConn list, then true is returned, otherwise false is returned. > -func (db *DB) putConnDBLocked(dc *driverConn, err error) bool { > +func (db *DB) putConnDBLocked(dc *driverConn, err error, force bool) bool > { > if db.connRequests.Len() > 0 { > req := db.connRequests.Front().Value.**(connRequest) > db.connRequests.Remove(db.**connRequests.Front()) > @@ -781,7 +773,7 @@ > req <- dc > } > return true > - } else if err == nil && !db.closed && db.maxIdleConnsLocked() > 0 > && db.maxIdleConnsLocked() > db.freeConn.Len() { > + } else if err == nil && !db.closed && (force || db.maxIdle > > db.freeConn.Len()) { > dc.listElem = db.freeConn.PushFront(dc) > return true > } > > > -- > > ---You received this message because you are subscribed to the Google > Groups "golang-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to golang-dev+unsubscribe@**googlegroups.com<golang-dev%2Bunsubscribe@googlegrou... > . > For more options, visit https://groups.google.com/**groups/opt_out<https://groups.google.com/groups/o... > . >
Sign in to reply to this message.
On 2013/10/11 19:07:44, bradfitz wrote: > No test to demonstrate the problem/fix? I could not reproduce the problem with a small program, but it manifested itself repeatedly in one of our high traffic sites every few days. The timing is pretty sensitive and the window for the leak to occur is very small. What basically happens is: - A new connection is requested while there are no idle connections. A signal is sent via openerCh to open a new one. - The gorutine running connectionOpener() gets the signal and starts opening a new connection. Since that involves sockets, it gets suspended. - In the meantime, maxIdle (2 by default) + 1 connections which were already open but busy are released. This fulfills the connection requested by the first goroutine and makes the idle list full. - The new connection finishes opening, execution resumes at openNewConnection() and it calls putConnDBLocked() without checking the return value, assuming the connection will either fulfill a pending request or be added to the idle queue, but that's not always the case. In this situation, the connection ends up discarded without being closed, but counted (numOpen is incremented). - Eventually, the number of leaked connections reaches MIN(maxOpenConnections, max_connections_the_server_allows) and either deadlocks or stops working bringing the whole database server down. This was a total pain to track down (in part because we have a ORM built on top of database/sql and I started digging there before looking in database/sql), but I think this explanation makes it pretty evident. As for the fix, I saw basically two options: either close and discard the connection or add it to the idle list. I opted for the latter because by the time we realize the connection is not needed anymore is already open and ready to use. It would be a waste to close it when the app will probably need to open a new one in the near future, even when this fix momentarily violates the maxIdle constraint. Regards, Alberto
Sign in to reply to this message.
Thanks for the explanation. Makes sense. The http client code has similar logic for opening new TCP connections to hosts. Please document the new force parameter. And put a subset of this email into a comment near it maybe. On Oct 11, 2013 9:27 AM, <alberto.garcia.hierro@gmail.com> wrote: > On 2013/10/11 19:07:44, bradfitz wrote: > >> No test to demonstrate the problem/fix? >> > > I could not reproduce the problem with a small program, but it > manifested itself repeatedly in one of our high traffic sites every few > days. The timing is pretty sensitive and the window for the leak to > occur is very small. What basically happens is: > > - A new connection is requested while there are no idle connections. A > signal is sent via openerCh to open a new one. > - The gorutine running connectionOpener() gets the signal and starts > opening a new connection. Since that involves sockets, it gets > suspended. > - In the meantime, maxIdle (2 by default) + 1 connections which were > already open but busy are released. This fulfills the connection > requested by the first goroutine and makes the idle list full. > - The new connection finishes opening, execution resumes at > openNewConnection() and it calls putConnDBLocked() without checking the > return value, assuming the connection will either fulfill a pending > request or be added to the idle queue, but that's not always the case. > In this situation, the connection ends up discarded without being > closed, but counted (numOpen is incremented). > - Eventually, the number of leaked connections reaches > MIN(maxOpenConnections, max_connections_the_server_**allows) and either > deadlocks or stops working bringing the whole database server down. > > This was a total pain to track down (in part because we have a ORM built > on top of database/sql and I started digging there before looking in > database/sql), but I think this explanation makes it pretty evident. > > As for the fix, I saw basically two options: either close and discard > the connection or add it to the idle list. I opted for the latter > because by the time we realize the connection is not needed anymore is > already open and ready to use. It would be a waste to close it when the > app will probably need to open a new one in the near future, even when > this fix momentarily violates the maxIdle constraint. > > Regards, > Alberto > > https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >
Sign in to reply to this message.
On 2013/10/11 19:34:42, bradfitz wrote: > Thanks for the explanation. Makes sense. The http client code has similar > logic for opening new TCP connections to hosts. > > Please document the new force parameter. And put a subset of this email > into a comment near it maybe. My pleasure. I'm glad we caught this before the 1.2. I've updated the patch according to your comments. PTAL. Regards, Alberto
Sign in to reply to this message.
It seems that there are still some connection leaks even after applying this patch, although they happen way more slowly than before. I'm currently investigating the issue. Please, don't merge it in the meantime.
Sign in to reply to this message.
On 2013/10/12 09:55:58, Hierro wrote: > It seems that there are still some connection leaks even after applying this > patch, although they happen way more slowly than before. I'm currently > investigating the issue. Please, don't merge it in the meantime. After further investigation I've found and fixed a connection leak in our ORM, so the CL still seems to be correct. I will let our development server run for a few hours while load testing it with ab and then I'll report the results later. Regards, Alberto
Sign in to reply to this message.
Please report back. Time is running short on Go 1.2, but this is why we have rc releases, to encourage people to use it and find bugs. I'm back to work in a few days and will try to write a test for this, unless you beat me to it. I think it's probably possible to instrument the fake driver to control its open-new-connection timing for this particular test. On Sat, Oct 12, 2013 at 12:57 AM, <alberto.garcia.hierro@gmail.com> wrote: > On 2013/10/12 09:55:58, Hierro wrote: > >> It seems that there are still some connection leaks even after >> > applying this > >> patch, although they happen way more slowly than before. I'm currently >> investigating the issue. Please, don't merge it in the meantime. >> > > After further investigation I've found and fixed a connection leak in > our ORM, so the CL still seems to be correct. I will let our development > server run for a few hours while load testing it with ab and then I'll > report the results later. > > Regards, > Alberto > > https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >
Sign in to reply to this message.
On 2013/10/12 21:00:54, bradfitz wrote: > Please report back. Time is running short on Go 1.2, but this is why we > have rc releases, to encourage people to use it and find bugs. > > I'm back to work in a few days and will try to write a test for this, > unless you beat me to it. I think it's probably possible to instrument the > fake driver to control its open-new-connection timing for this particular > test. I just came back home and, unfortunately, it seems I didn't properly start the test before leaving (I was already late and setting up the test in a rush). I'll leave it running over the night and I'll report back tomorrow. On the other hand, I just took a quick look a the fake driver and it seems I could easily make a test for this. It looks like the only significant thing I'd need to add to the the fake driver would be a configurable delay when opening a new connection. Would that be OK? Regards, Alberto
Sign in to reply to this message.
Don't use time.Sleep in the fake driver. Control operation order with channels. On Oct 12, 2013 12:13 PM, <alberto.garcia.hierro@gmail.com> wrote: > On 2013/10/12 21:00:54, bradfitz wrote: > >> Please report back. Time is running short on Go 1.2, but this is why >> > we > >> have rc releases, to encourage people to use it and find bugs. >> > > I'm back to work in a few days and will try to write a test for this, >> unless you beat me to it. I think it's probably possible to >> > instrument the > >> fake driver to control its open-new-connection timing for this >> > particular > >> test. >> > > I just came back home and, unfortunately, it seems I didn't properly > start the test before leaving (I was already late and setting up the > test in a rush). I'll leave it running over the night and I'll report > back tomorrow. > > On the other hand, I just took a quick look a the fake driver and it > seems I could easily make a test for this. It looks like the only > significant thing I'd need to add to the the fake driver would be a > configurable delay when opening a new connection. Would that be OK? > > Regards, > Alberto > > https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >
Sign in to reply to this message.
Good news everyone! The test has been running for 12 hours and db.numOpen matches the number of connections open in the server, so I guess we have a fix for this problem. This morning I also came up with a test for it and, in the process, found another couple of bugs in sql.go and another one in sql_test.go. Please, let me know when you're back to work so I can submit the CLs and get them merged quickly and then we can decide on a final solution for this issue. The current one is probably the optimal when it comes to performance but it makes some tests fail. We would need either to adjust the tests or discard the connection when the idle queue is full while also closing it. Regards, Alberto
Sign in to reply to this message.
Please mail your CLs when they're ready. I'll look Tuesday morning US/Pacific. On Oct 13, 2013 1:00 AM, <alberto.garcia.hierro@gmail.com> wrote: > Good news everyone! The test has been running for 12 hours and > db.numOpen matches the number of connections open in the server, so I > guess we have a fix for this problem. This morning I also came up with a > test for it and, in the process, found another couple of bugs in sql.go > and another one in sql_test.go. > > Please, let me know when you're back to work so I can submit the CLs and > get them merged quickly and then we can decide on a final solution for > this issue. The current one is probably the optimal when it comes to > performance but it makes some tests fail. We would need either to adjust > the tests or discard the connection when the idle queue is full while > also closing it. > > Regards, > Alberto > > https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >
Sign in to reply to this message.
https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.go File src/pkg/database/sql/sql.go (left): https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.g... src/pkg/database/sql/sql.go:485: func (db *DB) maxIdleConnsLocked() int { why is this being removed? can't you fix this more minimally with just the "force" part below? but if you do have to change this, you need to update the docs on the DB.maxIdle field. The values of 0 and -1 have changed, right? https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.go File src/pkg/database/sql/sql.go (right): https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.g... src/pkg/database/sql/sql.go:201: // Used to sygnal the need for new connections typo: signal https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.g... src/pkg/database/sql/sql.go:586: // hierro: Force the connection to be added to the idle list, remove "hierro: ". https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.g... src/pkg/database/sql/sql.go:592: // - The gorutine running connectionOpener() gets the signal and typo: goroutine https://codereview.appspot.com/14611045/diff/14001/src/pkg/database/sql/sql.g... src/pkg/database/sql/sql.go:593: // starts opening a new connection. Since that involves sockets, Change the "Since that" sentence to just "It blocks while connecting."
Sign in to reply to this message.
Hi Brad, Thanks for the review, I'll keep your comments in mind when submitting the changes. I was a bit busy today and I wasn't able to find time to submit the new CLs. Just one small question thought. I made a modification to the tests in closeDB() which uncovers a couple of connection leaks (one is in the test while the other one is very difficult to trigger) and a bug which incorrectly decrements numOpen. Those three fixes are 1 line each one. Should I submit everything in one CL or is it better if I split the fixes in smaller ones? In the latter case, should I submit the change to the test before or after the fixes? Thanks! Regards, Alberto
Sign in to reply to this message.
One CL. You don't have much time, though. Today or tomorrow morning would be best. The next Go 1.2 RC release (which might be the final release?) is coming soon. On Tue, Oct 15, 2013 at 12:04 PM, <alberto.garcia.hierro@gmail.com> wrote: > Hi Brad, > > Thanks for the review, I'll keep your comments in mind when submitting > the changes. I was a bit busy today and I wasn't able to find time to > submit the new CLs. Just one small question thought. I made a > modification to the tests in closeDB() which uncovers a couple of > connection leaks (one is in the test while the other one is very > difficult to trigger) and a bug which incorrectly decrements numOpen. > Those three fixes are 1 line each one. Should I submit everything in one > CL or is it better if I split the fixes in smaller ones? In the latter > case, should I submit the change to the test before or after the fixes? > Thanks! > > Regards, > Alberto > > https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >
Sign in to reply to this message.
Hi Brad, I've just submitted the first CL. Please, take a look at it https://codereview.appspot.com/14642044 Regards, Alberto
Sign in to reply to this message.
[+Tad] Tad, you might be interested in fixing this too, considering it affects your new feature. On Tue, Oct 15, 2013 at 12:33 PM, Brad Fitzpatrick <bradfitz@golang.org>wrote: > One CL. > > You don't have much time, though. Today or tomorrow morning would be > best. The next Go 1.2 RC release (which might be the final release?) is > coming soon. > > > > On Tue, Oct 15, 2013 at 12:04 PM, <alberto.garcia.hierro@gmail.com> wrote: > >> Hi Brad, >> >> Thanks for the review, I'll keep your comments in mind when submitting >> the changes. I was a bit busy today and I wasn't able to find time to >> submit the new CLs. Just one small question thought. I made a >> modification to the tests in closeDB() which uncovers a couple of >> connection leaks (one is in the test while the other one is very >> difficult to trigger) and a bug which incorrectly decrements numOpen. >> Those three fixes are 1 line each one. Should I submit everything in one >> CL or is it better if I split the fixes in smaller ones? In the latter >> case, should I submit the change to the test before or after the fixes? >> Thanks! >> >> Regards, >> Alberto >> >> https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >> > >
Sign in to reply to this message.
While closing a connection right after opening it might seems like a waste, doing so is the simpler solution. The last three lines of openNewConnection could be changed to: if db.putConnDBLocked(dc, err) { db.addDepLocked(dc, dc) db.numOpen++ } else { ci.Close() } If there is a lot of connection churn, then raising max idle seems like the the more appropriate solution. I can look at trying to create a test for this tomorrow if needed. On 2013/10/16 00:24:37, bradfitz wrote: > [+Tad] > > Tad, you might be interested in fixing this too, considering it affects > your new feature. > > > > On Tue, Oct 15, 2013 at 12:33 PM, Brad Fitzpatrick <bradfitz@golang.org>wrote: > > > One CL. > > > > You don't have much time, though. Today or tomorrow morning would be > > best. The next Go 1.2 RC release (which might be the final release?) is > > coming soon. > > > > > > > > On Tue, Oct 15, 2013 at 12:04 PM, <mailto:alberto.garcia.hierro@gmail.com> wrote: > > > >> Hi Brad, > >> > >> Thanks for the review, I'll keep your comments in mind when submitting > >> the changes. I was a bit busy today and I wasn't able to find time to > >> submit the new CLs. Just one small question thought. I made a > >> modification to the tests in closeDB() which uncovers a couple of > >> connection leaks (one is in the test while the other one is very > >> difficult to trigger) and a bug which incorrectly decrements numOpen. > >> Those three fixes are 1 line each one. Should I submit everything in one > >> CL or is it better if I split the fixes in smaller ones? In the latter > >> case, should I submit the change to the test before or after the fixes? > >> Thanks! > >> > >> Regards, > >> Alberto > >> > >> > https://codereview.appspot.**com/14611045/%3Chttps://codereview.appspot.com/1...> > >> > > > >
Sign in to reply to this message.
Sooner the better. The Go 1.2 window is closing soon. On Tue, Oct 15, 2013 at 8:31 PM, <Tad.Glines@gmail.com> wrote: > While closing a connection right after opening it might seems like a > waste, doing so is the simpler solution. > The last three lines of openNewConnection could be changed to: > if db.putConnDBLocked(dc, err) { > db.addDepLocked(dc, dc) > db.numOpen++ > } else { > ci.Close() > } > > If there is a lot of connection churn, then raising max idle seems like > the the more appropriate solution. > > I can look at trying to create a test for this tomorrow if needed. > > > On 2013/10/16 00:24:37, bradfitz wrote: > >> [+Tad] >> > > Tad, you might be interested in fixing this too, considering it >> > affects > >> your new feature. >> > > > > On Tue, Oct 15, 2013 at 12:33 PM, Brad Fitzpatrick >> > <bradfitz@golang.org>wrote: > > > One CL. >> > >> > You don't have much time, though. Today or tomorrow morning would >> > be > >> > best. The next Go 1.2 RC release (which might be the final >> > release?) is > >> > coming soon. >> > >> > >> > >> > On Tue, Oct 15, 2013 at 12:04 PM, >> > <mailto:alberto.garcia.hierro@**gmail.com<alberto.garcia.hierro@gmail.com>> > wrote: > >> > >> >> Hi Brad, >> >> >> >> Thanks for the review, I'll keep your comments in mind when >> > submitting > >> >> the changes. I was a bit busy today and I wasn't able to find time >> > to > >> >> submit the new CLs. Just one small question thought. I made a >> >> modification to the tests in closeDB() which uncovers a couple of >> >> connection leaks (one is in the test while the other one is very >> >> difficult to trigger) and a bug which incorrectly decrements >> > numOpen. > >> >> Those three fixes are 1 line each one. Should I submit everything >> > in one > >> >> CL or is it better if I split the fixes in smaller ones? In the >> > latter > >> >> case, should I submit the change to the test before or after the >> > fixes? > >> >> Thanks! >> >> >> >> Regards, >> >> Alberto >> >> >> >> >> > > https://codereview.appspot.****com/14611045/%3Chttps://codere** > view.appspot.com/14611045/ <http://codereview.appspot.com/14611045/>> > >> >> >> > >> > >> > > > https://codereview.appspot.**com/14611045/<https://codereview.appspot.com/146... >
Sign in to reply to this message.
I've updated the CL with the smallest possible fix, albeit not the most efficient, as well as the test which shows the problem. Please, keep in mind that in order to get the new test to fail you need to apply the CL at https://codereview.appspot.com/14642044/ because it contains the test for leaked connections in closeDB(). PTAL Regards, Alberto
Sign in to reply to this message.
We're keen to see a fix for this applied. Anything we can do to help? On Wednesday, October 16, 2013 8:05:00 AM UTC-4, Alberto GarcĂa Hierro wrote: > > I've updated the CL with the smallest possible fix, albeit not the most > efficient, as well as the test which shows the problem. Please, keep in > mind that in order to get the new test to fail you need to apply the CL > at https://codereview.appspot.com/14642044/ because it contains the test > for leaked connections in closeDB(). PTAL > > Regards, > Alberto > > https://codereview.appspot.com/14611045/ >
Sign in to reply to this message.
On 2013/10/16 13:05:10, bketelsen wrote: > We're keen to see a fix for this applied. Anything we can do to help? > Hi Brian, I wouldn't worry too much, because I don't think Go 1.2 will be released with this bug. It would render database/sql useless for a lot of scenarios. If you want to help, I think you could check that the new test exposes this problem and the fix works as intended. Steps to do that: 1 - hg clpatch 14642044 2 - hg 14611045 3 - cd src/pkg/database/sql 4 - go test, check if all tests pass 5 - edit sql.go and change: if db.putConnDBLocked(dc, err) { db.addDepLocked(dc, dc) db.numOpen++ } else { ci.Close() } to: db.putConnDBLocked(dc, err) 6 - go test -v run Leak and check if TestConnectionLeak fails 7 - report your results Thanks! Regards, Alberto
Sign in to reply to this message.
Small typo in previous email: step 2 should be hg clpatch 14611045 Regards, Alberto
Sign in to reply to this message.
LGTM
Sign in to reply to this message.
*** Submitted as https://code.google.com/p/go/source/detail?r=c67a1d0df694 *** database/sql: Fix connection leak and potential deadlock CL 10726044 introduced a race condition which causes connections to be leaked under certain circumstances. If SetMaxOpenConns is used, the application eventually deadlocks. Otherwise, the number of open connections just keep growing indefinitely. Fixes issue 6593 R=golang-dev, bradfitz, tad.glines, bketelsen CC=golang-dev https://codereview.appspot.com/14611045 Committer: Brad Fitzpatrick <bradfitz@golang.org>
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/10/16 16:23:02, bradfitz wrote: > *** Submitted as https://code.google.com/p/go/source/detail?r=c67a1d0df694 *** > database/sql: Fix connection leak and potential deadlock A recent change to database/sql seems to have broken the tests if you run with: go test -v -short -cpu 1,2,4,8,16,256 std === RUN TestQuery-2 fatal error: all goroutines are asleep - deadlock! runtime stack: runtime.throw(0x6f0a1b) /build/go.tip/go/src/pkg/runtime/panic.c:464 +0x69 checkdead() /build/go.tip/go/src/pkg/runtime/proc.c:2348 +0xe5 mput(0xc21004d000) /build/go.tip/go/src/pkg/runtime/proc.c:2620 +0x40 stopm() /build/go.tip/go/src/pkg/runtime/proc.c:932 +0xbb findrunnable() /build/go.tip/go/src/pkg/runtime/proc.c:1239 +0x42d schedule() /build/go.tip/go/src/pkg/runtime/proc.c:1322 +0xe3 park0(0xc21008fea0) /build/go.tip/go/src/pkg/runtime/proc.c:1363 +0xd8 runtime.mcall(0x4250c7) /build/go.tip/go/src/pkg/runtime/asm_amd64.s:178 +0x4b goroutine 1 [chan receive]: runtime.park(0x40a5f0, 0xc21017ab30, 0x6ed58d) /build/go.tip/go/src/pkg/runtime/proc.c:1344 +0x66 runtime.chanrecv(0x505c40, 0xc21017aae0, 0x7f1abaaffd18, 0x0, 0x0) /build/go.tip/go/src/pkg/runtime/chan.c:354 +0x50b runtime.chanrecv1(0x505c40, 0xc21017aae0, 0x7f1abaaffe10, 0x1) /build/go.tip/go/src/pkg/runtime/chan.c:446 +0x38 testing.RunTests(0x5b9248, 0x6ee800, 0x26, 0x26, 0x1) /build/go.tip/go/src/pkg/testing/testing.go:470 +0x8d5 testing.Main(0x5b9248, 0x6ee800, 0x26, 0x26, 0x6ec260, ...) /build/go.tip/go/src/pkg/testing/testing.go:401 +0x84 main.main() database/sql/_test/_testmain.go:137 +0x9c runtime.main() /build/go.tip/go/src/pkg/runtime/proc.c:222 +0x11f runtime.goexit() /build/go.tip/go/src/pkg/runtime/proc.c:1396 goroutine 2 [syscall]: runtime.notetsleepg(0x7f1abab1bf60, 0xdf8475800) /build/go.tip/go/src/pkg/runtime/lock_futex.c:190 +0x46 runtime.MHeap_Scavenger() /build/go.tip/go/src/pkg/runtime/mheap.c:463 +0xa3 runtime.goexit() /build/go.tip/go/src/pkg/runtime/proc.c:1396 created by runtime.main /build/go.tip/go/src/pkg/runtime/proc.c:181 goroutine 169 [chan receive]: runtime.park(0x40a5f0, 0xc21017ab90, 0x6ed5b4) /build/go.tip/go/src/pkg/runtime/proc.c:1344 +0x66 runtime.chanrecv(0x505d00, 0xc21017ab40, 0x7f1abab03f80, 0x0, 0x7f1abab03f80) /build/go.tip/go/src/pkg/runtime/chan.c:385 +0x1f2 runtime.chanrecv2(0x505d00, 0xc21017ab40, 0x416fd2) /build/go.tip/go/src/pkg/runtime/chan.c:458 +0x4b database/sql.(*DB).connectionOpener(0xc210157300) /build/go.tip/go/src/pkg/database/sql/sql.go:571 +0x3e runtime.goexit() /build/go.tip/go/src/pkg/runtime/proc.c:1396 created by database/sql.Open /build/go.tip/go/src/pkg/database/sql/sql.go:433 +0x24d goroutine 168 [chan receive]: runtime.park(0x40a5f0, 0xc21017a6b0, 0x6ed5b4) /build/go.tip/go/src/pkg/runtime/proc.c:1344 +0x66 runtime.chanrecv(0x505d00, 0xc21017a660, 0x7f1aba816a30, 0x0, 0x0) /build/go.tip/go/src/pkg/runtime/chan.c:385 +0x1f2 runtime.chanrecv1(0x505d00, 0xc21017a660) /build/go.tip/go/src/pkg/runtime/chan.c:446 +0x38 database/sql.(*fakeDriver).Open(0xc210064300, 0x56d840, 0x3, 0x0, 0x40fe64, ...) /build/go.tip/go/src/pkg/database/sql/fakedb_test.go:153 +0x29c database/sql.(*DB).conn(0xc210157300, 0x41e7c9, 0xc210171e40, 0x30) /build/go.tip/go/src/pkg/database/sql/sql.go:653 +0x202 database/sql.(*DB).exec(0xc210157300, 0x56b6e0, 0x4, 0x0, 0x0, ...) /build/go.tip/go/src/pkg/database/sql/sql.go:856 +0x6a database/sql.(*DB).Exec(0xc210157300, 0x56b6e0, 0x4, 0x0, 0x0, ...) /build/go.tip/go/src/pkg/database/sql/sql.go:847 +0x9a database/sql.newTestDB(0x7f1abab6a688, 0xc210085a20, 0x56eea0, 0x6, 0xc20ffeb540) /build/go.tip/go/src/pkg/database/sql/sql_test.go:47 +0x17a database/sql.TestQuery(0xc210085a20) /build/go.tip/go/src/pkg/database/sql/sql_test.go:160 +0x65 testing.tRunner(0xc210085a20, 0x6ee848) /build/go.tip/go/src/pkg/testing/testing.go:389 +0x8b runtime.goexit() /build/go.tip/go/src/pkg/runtime/proc.c:1396 created by testing.RunTests /build/go.tip/go/src/pkg/testing/testing.go:469 +0x8b2 FAIL database/sql 0.054s
Sign in to reply to this message.
Message was sent while issue was closed.
The problem is with the new test. Changing (in fakedb_test.go): if d.waitCh != nil { d.waitingCh <- struct{}{} <-d.waitCh } to if d.waitCh != nil { d.waitingCh <- struct{}{} <-d.waitCh d.waitCh = nil d.waitingCh = nil } fixes it. On 2013/10/17 11:06:54, albert.strasheim wrote: > On 2013/10/16 16:23:02, bradfitz wrote: > > *** Submitted as https://code.google.com/p/go/source/detail?r=c67a1d0df694 *** > > database/sql: Fix connection leak and potential deadlock > > A recent change to database/sql seems to have broken the tests if you run with: > > go test -v -short -cpu 1,2,4,8,16,256 std > > === RUN TestQuery-2 > fatal error: all goroutines are asleep - deadlock! > > runtime stack: > runtime.throw(0x6f0a1b) > /build/go.tip/go/src/pkg/runtime/panic.c:464 +0x69 > checkdead() > /build/go.tip/go/src/pkg/runtime/proc.c:2348 +0xe5 > mput(0xc21004d000) > /build/go.tip/go/src/pkg/runtime/proc.c:2620 +0x40 > stopm() > /build/go.tip/go/src/pkg/runtime/proc.c:932 +0xbb > findrunnable() > /build/go.tip/go/src/pkg/runtime/proc.c:1239 +0x42d > schedule() > /build/go.tip/go/src/pkg/runtime/proc.c:1322 +0xe3 > park0(0xc21008fea0) > /build/go.tip/go/src/pkg/runtime/proc.c:1363 +0xd8 > runtime.mcall(0x4250c7) > /build/go.tip/go/src/pkg/runtime/asm_amd64.s:178 +0x4b > > goroutine 1 [chan receive]: > runtime.park(0x40a5f0, 0xc21017ab30, 0x6ed58d) > /build/go.tip/go/src/pkg/runtime/proc.c:1344 +0x66 > runtime.chanrecv(0x505c40, 0xc21017aae0, 0x7f1abaaffd18, 0x0, 0x0) > /build/go.tip/go/src/pkg/runtime/chan.c:354 +0x50b > runtime.chanrecv1(0x505c40, 0xc21017aae0, 0x7f1abaaffe10, 0x1) > /build/go.tip/go/src/pkg/runtime/chan.c:446 +0x38 > testing.RunTests(0x5b9248, 0x6ee800, 0x26, 0x26, 0x1) > /build/go.tip/go/src/pkg/testing/testing.go:470 +0x8d5 > testing.Main(0x5b9248, 0x6ee800, 0x26, 0x26, 0x6ec260, ...) > /build/go.tip/go/src/pkg/testing/testing.go:401 +0x84 > main.main() > database/sql/_test/_testmain.go:137 +0x9c > runtime.main() > /build/go.tip/go/src/pkg/runtime/proc.c:222 +0x11f > runtime.goexit() > /build/go.tip/go/src/pkg/runtime/proc.c:1396 > > goroutine 2 [syscall]: > runtime.notetsleepg(0x7f1abab1bf60, 0xdf8475800) > /build/go.tip/go/src/pkg/runtime/lock_futex.c:190 +0x46 > runtime.MHeap_Scavenger() > /build/go.tip/go/src/pkg/runtime/mheap.c:463 +0xa3 > runtime.goexit() > /build/go.tip/go/src/pkg/runtime/proc.c:1396 > created by runtime.main > /build/go.tip/go/src/pkg/runtime/proc.c:181 > > goroutine 169 [chan receive]: > runtime.park(0x40a5f0, 0xc21017ab90, 0x6ed5b4) > /build/go.tip/go/src/pkg/runtime/proc.c:1344 +0x66 > runtime.chanrecv(0x505d00, 0xc21017ab40, 0x7f1abab03f80, 0x0, 0x7f1abab03f80) > /build/go.tip/go/src/pkg/runtime/chan.c:385 +0x1f2 > runtime.chanrecv2(0x505d00, 0xc21017ab40, 0x416fd2) > /build/go.tip/go/src/pkg/runtime/chan.c:458 +0x4b > database/sql.(*DB).connectionOpener(0xc210157300) > /build/go.tip/go/src/pkg/database/sql/sql.go:571 +0x3e > runtime.goexit() > /build/go.tip/go/src/pkg/runtime/proc.c:1396 > created by database/sql.Open > /build/go.tip/go/src/pkg/database/sql/sql.go:433 +0x24d > > goroutine 168 [chan receive]: > runtime.park(0x40a5f0, 0xc21017a6b0, 0x6ed5b4) > /build/go.tip/go/src/pkg/runtime/proc.c:1344 +0x66 > runtime.chanrecv(0x505d00, 0xc21017a660, 0x7f1aba816a30, 0x0, 0x0) > /build/go.tip/go/src/pkg/runtime/chan.c:385 +0x1f2 > runtime.chanrecv1(0x505d00, 0xc21017a660) > /build/go.tip/go/src/pkg/runtime/chan.c:446 +0x38 > database/sql.(*fakeDriver).Open(0xc210064300, 0x56d840, 0x3, 0x0, 0x40fe64, ...) > /build/go.tip/go/src/pkg/database/sql/fakedb_test.go:153 +0x29c > database/sql.(*DB).conn(0xc210157300, 0x41e7c9, 0xc210171e40, 0x30) > /build/go.tip/go/src/pkg/database/sql/sql.go:653 +0x202 > database/sql.(*DB).exec(0xc210157300, 0x56b6e0, 0x4, 0x0, 0x0, ...) > /build/go.tip/go/src/pkg/database/sql/sql.go:856 +0x6a > database/sql.(*DB).Exec(0xc210157300, 0x56b6e0, 0x4, 0x0, 0x0, ...) > /build/go.tip/go/src/pkg/database/sql/sql.go:847 +0x9a > database/sql.newTestDB(0x7f1abab6a688, 0xc210085a20, 0x56eea0, 0x6, > 0xc20ffeb540) > /build/go.tip/go/src/pkg/database/sql/sql_test.go:47 +0x17a > database/sql.TestQuery(0xc210085a20) > /build/go.tip/go/src/pkg/database/sql/sql_test.go:160 +0x65 > testing.tRunner(0xc210085a20, 0x6ee848) > /build/go.tip/go/src/pkg/testing/testing.go:389 +0x8b > runtime.goexit() > /build/go.tip/go/src/pkg/runtime/proc.c:1396 > created by testing.RunTests > /build/go.tip/go/src/pkg/testing/testing.go:469 +0x8b2 > FAIL database/sql 0.054s
Sign in to reply to this message.
Message was sent while issue was closed.
On 2013/10/17 13:28:14, Tad Glines wrote: > The problem is with the new test. > Changing (in fakedb_test.go): > if d.waitCh != nil { > d.waitingCh <- struct{}{} > <-d.waitCh > } > to > if d.waitCh != nil { > d.waitingCh <- struct{}{} > <-d.waitCh > d.waitCh = nil > d.waitingCh = nil > } > > fixes it. Thanks, I'll submit a CL right now. Regards, Alberto
Sign in to reply to this message.
|