Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x/crypto/poly1305: crash on ARM #17499

Closed
bradfitz opened this issue Oct 18, 2016 · 13 comments
Closed

x/crypto/poly1305: crash on ARM #17499

bradfitz opened this issue Oct 18, 2016 · 13 comments
Milestone

Comments

@bradfitz
Copy link
Contributor

Noticed a trybot flake on ARM:

https://storage.googleapis.com/go-build-log/fbb1da3a/linux-arm_5dfc64f9.log

ok      net 2.690s
runtime: frame vendor/golang_org/x/crypto/poly1305.poly1305_auth_armv6 untyped locals 0x10c31a54+0x40
fatal error: missing stackmap

runtime stack:
runtime.throw(0x3b4134, 0x10)
    /workdir/go/src/runtime/panic.go:587 +0x70
runtime.scanframeworker(0x41486c04, 0x41486c78, 0x108193ac)
    /workdir/go/src/runtime/mgcmark.go:855 +0x56c
runtime.scanstack.func1(0x41486c04, 0x0, 0x41486b01)
    /workdir/go/src/runtime/mgcmark.go:781 +0x54
runtime.gentraceback(0xffffffff, 0x10c31a54, 0x0, 0x10b01860, 0x0, 0x0, 0x7fffffff, 0x41486d38, 0x0, 0x0, ...)
    /workdir/go/src/runtime/traceback.go:378 +0xe04
runtime.scanstack(0x10b01860, 0x108193ac)
    /workdir/go/src/runtime/mgcmark.go:804 +0x1b0
runtime.newstack()
    /workdir/go/src/runtime/stack.go:1035 +0x350
runtime.morestack()
    /workdir/go/src/runtime/asm_arm.s:313 +0x5c

goroutine 5475 [select (scan)]:
vendor/golang_org/x/crypto/poly1305.poly1305_auth_armv6(0x30, 0x30, 0x1d, 0x3fb)
    /workdir/go/src/vendor/golang_org/x/crypto/poly1305/sum_arm.s:367 +0x7c fp=0x10c31a94 sp=0x10c31a54
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).sealGeneric(0xd, 0xd, 0x10adc005, 0x1d, 0x3fb, 0x19fd60, 0x10a0c700, 0x10adc005, 0x0, 0x3fb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_generic.go:36 +0x350 fp=0x10c31b30 sp=0x10c31a94
created by testing.(*T).Run
    /workdir/go/src/testing/testing.go:657 +0x238

goroutine 1 [chan receive]:
testing.(*T).Run(0x1080b080, 0x3b76c3, 0x15, 0x3d3164, 0x104201)
    /workdir/go/src/testing/testing.go:658 +0x258
testing.RunTests.func1(0x1080b080)
    /workdir/go/src/testing/testing.go:804 +0x54
testing.tRunner(0x1080b080, 0x10845f08)
    /workdir/go/src/testing/testing.go:621 +0x7c
testing.RunTests(0x3d2814, 0x5823e8, 0x191, 0x191, 0x10850070)
    /workdir/go/src/testing/testing.go:810 +0x24c
testing.(*M).Run(0x10830f7c, 0x3b7b8)
    /workdir/go/src/testing/testing.go:754 +0x70
net/http_test.TestMain(0x10845f74)
    /workdir/go/src/net/http/main_test.go:19 +0x1c
main.main()
    net/http/_test/_testmain.go:904 +0xa0

goroutine 17 [syscall, locked to thread]:
runtime.goexit()
    /workdir/go/src/runtime/asm_arm.s:996 +0x4

goroutine 2026 [chan receive]:
testing.(*T).Parallel(0x10a37800)
    /workdir/go/src/testing/testing.go:563 +0x118
net/http_test.setParallel(0x10a37800)
    /workdir/go/src/net/http/main_test.go:87 +0x34
net/http_test.TestServerTimeouts(0x10a37800)
    /workdir/go/src/net/http/serve_test.go:462 +0x20
testing.tRunner(0x10a37800, 0x3d3130)
    /workdir/go/src/testing/testing.go:621 +0x7c
created by testing.(*T).Run
    /workdir/go/src/testing/testing.go:657 +0x238

goroutine 2182 [chan receive]:
testing.(*T).Parallel(0x10b0cd00)
    /workdir/go/src/testing/testing.go:563 +0x118
net/http_test.setParallel(0x10b0cd00)
    /workdir/go/src/net/http/main_test.go:87 +0x34
net/http_test.TestTLSHandshakeTimeout(0x10b0cd00)
    /workdir/go/src/net/http/serve_test.go:1026 +0x20
testing.tRunner(0x10b0cd00, 0x3d319c)
    /workdir/go/src/testing/testing.go:621 +0x7c
created by testing.(*T).Run
    /workdir/go/src/testing/testing.go:657 +0x238

goroutine 5397 [IO wait]:
net.runtime_pollWait(0x41f12ab0, 0x72, 0x0)
    /workdir/go/src/runtime/netpoll.go:164 +0x44
net.(*pollDesc).wait(0x10920bbc, 0x72, 0x0, 0x10bde020)
    /workdir/go/src/net/fd_poll_runtime.go:75 +0x28
net.(*pollDesc).waitRead(0x10920bbc, 0xffffffff, 0x0)
    /workdir/go/src/net/fd_poll_runtime.go:80 +0x24
net.(*netFD).accept(0x10920b80, 0x0, 0x552658, 0x10bde020)
    /workdir/go/src/net/fd_unix.go:422 +0x15c
net.(*TCPListener).accept(0x10a02998, 0x584978, 0x40147258, 0x0)
    /workdir/go/src/net/tcpsock_posix.go:132 +0x20
net.(*TCPListener).Accept(0x10a02998, 0x10a2af48, 0x24faf8, 0x10a2af68, 0x60a38)
    /workdir/go/src/net/tcpsock.go:225 +0x3c
crypto/tls.(*listener).Accept(0x10a09190, 0x3d2940, 0x10bf8000, 0x555140, 0x109dc020)
    /workdir/go/src/crypto/tls/tls.go:52 +0x28
net/http.(*Server).Serve(0x109d08c0, 0x554798, 0x10a09190, 0x0, 0x0)
    /workdir/go/src/net/http/server.go:2345 +0x154
net/http/httptest.(*Server).goServe.func1(0x10920bc0)
    /workdir/go/src/net/http/httptest/server.go:236 +0x5c
created by net/http/httptest.(*Server).goServe
    /workdir/go/src/net/http/httptest/server.go:237 +0x48

goroutine 5593 [runnable]:
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).sealGeneric(0x109dc780, 0x1096a005, 0x0, 0x1ffb, 0x109dc7a0, 0xc, 0xc, 0x1096a005, 0x1009, 0x1ffb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_generic.go:29 +0x188
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).seal(0x109dc780, 0x1096a005, 0x0, 0x1ffb, 0x109dc7a0, 0xc, 0xc, 0x1096a005, 0x1009, 0x1ffb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_noasm.go:10 +0x7c
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).Seal(0x109dc780, 0x1096a005, 0x0, 0x1ffb, 0x109dc7a0, 0xc, 0xc, 0x1096a005, 0x1009, 0x1ffb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305.go:51 +0xc0
crypto/tls.(*xorNonceAEAD).Seal(0x109dc7a0, 0x1096a005, 0x0, 0x1ffb, 0x10998120, 0x8, 0x8, 0x1096a005, 0x1009, 0x1ffb, ...)
    /workdir/go/src/crypto/tls/cipher_suites.go:198 +0xcc
crypto/tls.(*halfConn).encrypt(0x109980fc, 0x109dc1a0, 0x0, 0x4000)
    /workdir/go/src/crypto/tls/conn.go:413 +0x4b0
crypto/tls.(*Conn).writeRecordLocked(0x10998000, 0x3d3717, 0x10873300, 0x1009, 0x1300, 0x0, 0x0, 0x0)
    /workdir/go/src/crypto/tls/conn.go:901 +0x278
crypto/tls.(*Conn).Write(0x10998000, 0x10873300, 0x1009, 0x1300, 0x0, 0x0, 0x0)
    /workdir/go/src/crypto/tls/conn.go:1054 +0x160
bufio.(*Writer).Write(0x109482a0, 0x10873300, 0x1009, 0x1300, 0x10998000, 0x3d3717, 0x108b1000)
    /workdir/go/src/bufio/bufio.go:590 +0x140
net/http.(*http2bufferedWriter).Write(0x10bde1a0, 0x10873300, 0x1009, 0x1300, 0x10c41030, 0x1, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:2618 +0x44
net/http.(*http2Framer).endWrite(0x10862d90, 0x0, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:899 +0x8c
net/http.(*http2Framer).WriteDataPadded(0x10862d90, 0x11, 0x0, 0x10a32000, 0x1000, 0x1000, 0x0, 0x0, 0x0, 0x5e33c, ...)
    /workdir/go/src/net/http/h2_bundle.go:1177 +0x1f4
net/http.(*http2Framer).WriteData(0x10862d90, 0x11, 0x34e300, 0x10a32000, 0x1000, 0x1000, 0x22a4a0, 0x10a02501)
    /workdir/go/src/net/http/h2_bundle.go:1146 +0x58
net/http.(*http2writeData).writeFrame(0x109489c0, 0x555230, 0x10a30140, 0x0, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:6928 +0x5c
net/http.(*http2serverConn).writeFrameAsync(0x10a30140, 0x552418, 0x109489c0, 0x10b92380, 0x109206c0)
    /workdir/go/src/net/http/h2_bundle.go:3369 +0x34
created by net/http.(*http2serverConn).startFrameWrite
    /workdir/go/src/net/http/h2_bundle.go:3611 +0x16c

goroutine 5443 [select]:
net/http.(*http2serverConn).serve(0x10a30140)
    /workdir/go/src/net/http/h2_bundle.go:3437 +0x570
net/http.(*http2Server).ServeConn(0x109f5a00, 0x555ed0, 0x10998000, 0x10c33e2c)
    /workdir/go/src/net/http/h2_bundle.go:3102 +0x5ec
net/http.http2ConfigureServer.func1(0x109d08c0, 0x10998000, 0x552d78, 0x109be048)
    /workdir/go/src/net/http/h2_bundle.go:2989 +0x70
net/http.(*conn).serve(0x10bf8000, 0x555140, 0x109dc020)
    /workdir/go/src/net/http/server.go:1622 +0xbd0
created by net/http.(*Server).Serve
    /workdir/go/src/net/http/server.go:2365 +0x394

goroutine 5400 [runnable]:
net.runtime_pollWait(0x41cd8638, 0x72, 0x10ad0800)
    /workdir/go/src/runtime/netpoll.go:164 +0x44
net.(*pollDesc).wait(0x10c4003c, 0x72, 0x553120, 0x5511e4)
    /workdir/go/src/net/fd_poll_runtime.go:75 +0x28
net.(*pollDesc).waitRead(0x10c4003c, 0x10ad0800, 0x400)
    /workdir/go/src/net/fd_poll_runtime.go:80 +0x24
net.(*netFD).Read(0x10c40000, 0x10ad0800, 0x400, 0x400, 0x0, 0x553120, 0x5511e4)
    /workdir/go/src/net/fd_unix.go:246 +0x124
net.(*conn).Read(0x109be008, 0x10ad0800, 0x400, 0x400, 0x0, 0x0, 0x0)
    /workdir/go/src/net/net.go:176 +0x58
crypto/tls.(*block).readFromUntil(0x109dc4c0, 0x41cd8a18, 0x109be008, 0x5, 0x109be008, 0x0)
    /workdir/go/src/crypto/tls/conn.go:481 +0x78
crypto/tls.(*Conn).readRecord(0x10998000, 0x3d3717, 0x10998094, 0x0)
    /workdir/go/src/crypto/tls/conn.go:583 +0x88
crypto/tls.(*Conn).Read(0x10998000, 0x10862db0, 0x9, 0x9, 0x0, 0x0, 0x0)
    /workdir/go/src/crypto/tls/conn.go:1120 +0xd4
io.ReadAtLeast(0x551fb0, 0x10998000, 0x10862db0, 0x9, 0x9, 0x9, 0x0, 0x109f4670, 0x109f460c)
    /workdir/go/src/io/io.go:307 +0x8c
io.ReadFull(0x551fb0, 0x10998000, 0x10862db0, 0x9, 0x9, 0x10a06e10, 0x0, 0x0)
    /workdir/go/src/io/io.go:325 +0x40
net/http.http2readFrameHeader(0x10862db0, 0x9, 0x9, 0x551fb0, 0x10998000, 0x0, 0x0, 0x0, 0x0, 0x10836f40)
    /workdir/go/src/net/http/h2_bundle.go:781 +0x50
net/http.(*http2Framer).ReadFrame(0x10862d90, 0x10c41180, 0x0, 0x1, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:1003 +0x60
net/http.(*http2serverConn).readFrames(0x10a30140)
    /workdir/go/src/net/http/h2_bundle.go:3341 +0x84
created by net/http.(*http2serverConn).serve
    /workdir/go/src/net/http/h2_bundle.go:3431 +0x240

goroutine 5521 [select]:
net/http.(*http2serverConn).writeDataFromHandler(0x10a30140, 0x10b92380, 0x10a32000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:3518 +0x278
net/http.(*http2responseWriterState).writeChunk(0x10886370, 0x10a32000, 0x1000, 0x1000, 0x1000, 0x0, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:4637 +0x3e0
net/http.http2chunkWriter.Write(0x10886370, 0x10a32000, 0x1000, 0x1000, 0x1000, 0x0, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:4546 +0x34
bufio.(*Writer).Flush(0x10948460, 0x10c83000, 0x1000)
    /workdir/go/src/bufio/bufio.go:558 +0x60
bufio.(*Writer).WriteString(0x10948460, 0x10c83000, 0x5c000, 0x80001, 0x41d10000, 0x0)
    /workdir/go/src/bufio/bufio.go:661 +0xd0
net/http.(*http2responseWriter).write(0x10a02558, 0x80000, 0x0, 0x0, 0x0, 0x10c60000, 0x80000, 0x398f20, 0x80001, 0x41f05a38)
    /workdir/go/src/net/http/h2_bundle.go:4817 +0x13c
net/http.(*http2responseWriter).WriteString(0x10a02558, 0x10c60000, 0x80000, 0x10a2062c, 0x308701, 0x369a78)
    /workdir/go/src/net/http/h2_bundle.go:4793 +0x44
io.WriteString(0x41d10000, 0x10a02558, 0x10c60000, 0x80000, 0x10a02558, 0x10921900, 0x22b408)
    /workdir/go/src/io/io.go:289 +0x70
net/http_test.testSniffWriteSize.func1(0x554898, 0x10a02558, 0x1093ad00)
    /workdir/go/src/net/http/sniff_test.go:166 +0xb8
net/http.HandlerFunc.ServeHTTP(0x10a02988, 0x554898, 0x10a02558, 0x1093ad00)
    /workdir/go/src/net/http/server.go:1800 +0x34
net/http.serverHandler.ServeHTTP(0x109d08c0, 0x554898, 0x10a02558, 0x1093ad00)
    /workdir/go/src/net/http/server.go:2276 +0x7c
net/http.initNPNRequest.ServeHTTP(0x10998000, 0x109d08c0, 0x554898, 0x10a02558, 0x1093ad00)
    /workdir/go/src/net/http/server.go:2742 +0x5c
net/http.(*initNPNRequest).ServeHTTP(0x109be048, 0x554898, 0x10a02558, 0x1093ad00)
    <autogenerated>:286 +0x6c
net/http.(Handler).ServeHTTP-fm(0x554898, 0x10a02558, 0x1093ad00)
    /workdir/go/src/net/http/h2_bundle.go:4113 +0x3c
net/http.(*http2serverConn).runHandler(0x10a30140, 0x10a02558, 0x1093ad00, 0x109f4680)
    /workdir/go/src/net/http/h2_bundle.go:4347 +0x70
created by net/http.(*http2serverConn).processHeaders
    /workdir/go/src/net/http/h2_bundle.go:4125 +0x4f8

goroutine 5477 [runnable]:
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).openGeneric(0x10a0c740, 0x10bc0005, 0x0, 0x1ffb, 0x10a0c760, 0xc, 0xc, 0x10bc0005, 0x1019, 0x1ffb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_generic.go:53 +0x170
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).open(0x10a0c740, 0x10bc0005, 0x0, 0x1ffb, 0x10a0c760, 0xc, 0xc, 0x10bc0005, 0x1019, 0x1ffb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_noasm.go:14 +0x7c
vendor/golang_org/x/crypto/chacha20poly1305.(*chacha20poly1305).Open(0x10a0c740, 0x10bc0005, 0x0, 0x1ffb, 0x10a0c760, 0xc, 0xc, 0x10bc0005, 0x1019, 0x1ffb, ...)
    /workdir/go/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305.go:67 +0xc8
crypto/tls.(*xorNonceAEAD).Open(0x10a0c760, 0x10bc0005, 0x0, 0x1ffb, 0x10ffc0b8, 0x8, 0x8, 0x10bc0005, 0x1019, 0x1ffb, ...)
    /workdir/go/src/crypto/tls/cipher_suites.go:210 +0xcc
crypto/tls.(*halfConn).decrypt(0x10ffc094, 0x10a0c020, 0x101e, 0x10a0c020, 0x10948240)
    /workdir/go/src/crypto/tls/conn.go:299 +0x588
crypto/tls.(*Conn).readRecord(0x10ffc000, 0x3d3717, 0x10ffc094, 0x589714)
    /workdir/go/src/crypto/tls/conn.go:640 +0x204
crypto/tls.(*Conn).Read(0x10ffc000, 0x10af1000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
    /workdir/go/src/crypto/tls/conn.go:1120 +0xd4
bufio.(*Reader).fill(0x109a1260)
    /workdir/go/src/bufio/bufio.go:97 +0xf4
bufio.(*Reader).Read(0x109a1260, 0x10bdcbf0, 0x9, 0x9, 0x10826500, 0x5e4dc, 0x10826500)
    /workdir/go/src/bufio/bufio.go:209 +0x15c
io.ReadAtLeast(0x551e90, 0x109a1260, 0x10bdcbf0, 0x9, 0x9, 0x9, 0x10949540, 0x20258, 0x202ec)
    /workdir/go/src/io/io.go:307 +0x8c
io.ReadFull(0x551e90, 0x109a1260, 0x10bdcbf0, 0x9, 0x9, 0x0, 0x10818aa4, 0x10818aa4)
    /workdir/go/src/io/io.go:325 +0x40
net/http.http2readFrameHeader(0x10bdcbf0, 0x9, 0x9, 0x551e90, 0x109a1260, 0x0, 0x0, 0x0, 0x10953000, 0x1000)
    /workdir/go/src/net/http/h2_bundle.go:781 +0x50
net/http.(*http2Framer).ReadFrame(0x10bdcbd0, 0x109dcde0, 0x0, 0x0, 0x0)
    /workdir/go/src/net/http/h2_bundle.go:1003 +0x60
net/http.(*http2clientConnReadLoop).run(0x10833fc0, 0x3d295c, 0x10a2d7c8)
    /workdir/go/src/net/http/h2_bundle.go:6107 +0x60
net/http.(*http2ClientConn).readLoop(0x10b7c000)
    /workdir/go/src/net/http/h2_bundle.go:6036 +0x8c
created by net/http.(*http2Transport).newClientConn
    /workdir/go/src/net/http/h2_bundle.go:5360 +0x5a0
FAIL    net/http    17.660s
2016/10/18 08:47:41 Failed: exit status 1


Error: tests failed: dist test failed: go_test:net/http: exit status 1
@bradfitz bradfitz added this to the Go1.8 milestone Oct 18, 2016
@mundaym
Copy link
Member

mundaym commented Oct 18, 2016

Hmm, looks like this might be down to my suggestion to use NOFRAME,$0 instead of ,$-4. It looks like NOFRAME hasn't been implemented on arm. @cherrymui can you confirm?

@cherrymui
Copy link
Member

@mundaym, yes, NOFRAME is only implemented on ppc64 and s390x, and on amd64 for dropping the frame pointer.
Is there any arm assembly code using NOFRAME? I didn't see any.

@mundaym
Copy link
Member

mundaym commented Oct 18, 2016

Yeah, in this file: x/crypto/poly1305/sum_arm.s

@cherrymui
Copy link
Member

Oh I see. It's in a subrepo.

I am not sure I understand the code: it pushes may registers to stack, without allocating a stack frame. Does the caller allocate frame for it? How could (accidentally) saving LR to stack in poly1305_finish_ext_armv6 cause problem? There must be something I've missed...

@cherrymui
Copy link
Member

To clarify, NOFRAME is no-op.
poly1305_init_ext_armv6 and poly1305_blocks_armv6 do not save LR because they are leaf functions. However, poly1305_finish_ext_armv6 does.

@mundaym
Copy link
Member

mundaym commented Oct 18, 2016

OK, thanks Cherry, so it sounds like this probably isn't a regression due to the NOFRAME thing (although that should be fixed).

In that case I suspect that poly1305_auth_armv6 should contain the NO_LOCAL_POINTERS macro. That also means that arbitrary registers probably shouldn't be pushed onto its stack though (since they might contain pointers). It might work in practice so long as the arguments to poly1305_auth_armv6 are kept alive throughout its lifetime (which they should be).

@cherrymui
Copy link
Member

it sounds like this probably isn't a regression due to the NOFRAME thing (although that should be fixed).

Probably not, depending on whether saving LR causes a problem (not likely).

In that case I suspect that poly1305_auth_armv6 should contain the NO_LOCAL_POINTERS macro.

Yes, I think so.

That also means that arbitrary registers probably shouldn't be pushed onto its stack though (since they might contain pointers). It might work in practice so long as the arguments to poly1305_auth_armv6 are kept alive throughout its lifetime (which they should be).

Pointers are fine because its call tree is NOSPLIT, i.e. once it enters the function it is not preemptible. But I'm worried that the decrements of SP (in many places) may overflow the stack.

That said, I'm not familiar with that code. There may be some thoughts behind it that I missed.

@agl
Copy link
Contributor

agl commented Oct 18, 2016

I know the original author of the assembly code (which was not written with Go in mind) and I suspect that it was correct at the time. I don't know the person who originally converted it to Go however, and I suspect that I'll need some assistance in fixing this.

The only entry point for the functions in sum_arm.s is poly1305_auth_armv6. The other TEXT lines might well be superfluous?

Building a binary and running go tool objdump on it results in this (which I'll comment on inline):

(Recall that the TEXT line is TEXT ·poly1305_auth_armv6(SB), $280-16.)

TEXT golang.org/x/crypto/poly1305.poly1305_auth_armv6(SB) /home/agl/devel/gopkg/src/golang.org/x/crypto/poly1305/sum_arm.s
        sum_arm.s:367   0xd1e48 e59a1008        MOVW 0x8(R10), R1
        sum_arm.s:367   0xd1e4c e24d2f47        SUB $284, R13, R2
        sum_arm.s:367   0xd1e50 e1520001        CMP R1, R2
        sum_arm.s:367   0xd1e54 9a000018        B.LS 0xd1ebc

This is stack check, I assume, and it's checking for 284 bytes of free space. If the stack isn't large enough, it'll end up back here via runtime.morestack_noctxt.

        sum_arm.s:367   0xd1e58 e52de11c        MOVW.W R14, -0x11c(R13)

This has been added by the compiler, although I'm not sure why.

        sum_arm.s:368   0xd1e5c e59d4120        MOVW 0x120(R13), R4
        sum_arm.s:369   0xd1e60 e59d5124        MOVW 0x124(R13), R5
        sum_arm.s:370   0xd1e64 e59d6128        MOVW 0x128(R13), R6
        sum_arm.s:371   0xd1e68 e59d712c        MOVW 0x12c(R13), R7

This is the loading of argument from the stack, but the offsets assume that R13 has already been moved down ~280 bytes, although I don't know when that happened.

        sum_arm.s:373   0xd1e6c e1a0800d        MOVW R13, R8
        sum_arm.s:374   0xd1e70 e3cdd03f        BIC $63, R13, R13 
        sum_arm.s:375   0xd1e74 e24dd040        SUB $64, R13, R13

This will subtract between 64 and 127 bytes from the stack pointer and end up aligning it to 64 bytes. The rest of the code here operates on a datastructure that's ~64 bytes and which will be placed at this point in the stack.

        sum_arm.s:376   0xd1e78 e1a0000d        MOVW R13, R0
        sum_arm.s:377   0xd1e7c e1a01007        MOVW R7, R1
        sum_arm.s:378   0xd1e80 ebfffe93        BL poly1305_init_ext_armv6(SB)

It doesn't help that I don't understand ARM assembly (instructions like MOVM.DB.W seem to be updating the destination address register?) but we should have at least 280-128 bytes of stack to play with, right? So if the remaining functions don't overflow that we should be ok? (Or has R13 already been moved down, thus the masking with ~63 and subtraction of 64 just send us off into space.)

The code does keep pointers into the stack in registers however. But there are no other stack-splitting points after the first. Can the GC preempt arbitrarily, or only at stack splits?

        sum_arm.s:379   0xd1e84 e3d6200f        BIC.S $15, R6, R2
        sum_arm.s:380   0xd1e88 0a000004        B.EQ 0xd1ea0
        sum_arm.s:381   0xd1e8c e1a0000d        MOVW R13, R0
        sum_arm.s:382   0xd1e90 e1a01005        MOVW R5, R1
        sum_arm.s:383   0xd1e94 e0855002        ADD R2, R5, R5
        sum_arm.s:384   0xd1e98 e0466002        SUB R2, R6, R6
        sum_arm.s:385   0xd1e9c ebfffeaa        BL poly1305_blocks_armv6(SB)
        sum_arm.s:388   0xd1ea0 e1a0000d        MOVW R13, R0
        sum_arm.s:389   0xd1ea4 e1a01005        MOVW R5, R1
        sum_arm.s:390   0xd1ea8 e1a02006        MOVW R6, R2
        sum_arm.s:391   0xd1eac e1a03004        MOVW R4, R3
        sum_arm.s:392   0xd1eb0 ebffff46        BL poly1305_finish_ext_armv6(SB)
        sum_arm.s:393   0xd1eb4 e1a0d008        MOVW R8, R13 
        sum_arm.s:394   0xd1eb8 e49df11c        RET #284
        sum_arm.s:367   0xd1ebc e1a0300e        MOVW R14, R3
        sum_arm.s:367   0xd1ec0 ebfe3be8        BL runtime.morestack_noctxt(SB)
        sum_arm.s:367   0xd1ec4 eaffffdf        B golang.org/x/crypto/poly1305.poly1305_auth_armv6(SB)

@cherrymui
Copy link
Member

  sum_arm.s:367   0xd1e58 e52de11c        MOVW.W R14, -0x11c(R13)

This has been added by the compiler, although I'm not sure why.

When you declare frame size in TEXT, the assembler backend actually allocate the frame with this size, by inserting an instruction to decrement SP. (It also saves LR on stack, because it is non-leaf, and merges into a single instruction).

It is very rare that hand written assembly decrement SP, probably only in code that manipulates stack frame.

sum_arm.s:368 0xd1e5c e59d4120 MOVW 0x120(R13), R4
sum_arm.s:369 0xd1e60 e59d5124 MOVW 0x124(R13), R5
sum_arm.s:370 0xd1e64 e59d6128 MOVW 0x128(R13), R6
sum_arm.s:371 0xd1e68 e59d712c MOVW 0x12c(R13), R7
This is the loading of argument from the stack, but the offsets assume that R13 has already been moved down ~280 bytes, although I don't know when that happened.

Because it decremented SP first.

sum_arm.s:373 0xd1e6c e1a0800d MOVW R13, R8
sum_arm.s:374 0xd1e70 e3cdd03f BIC $63, R13, R13
sum_arm.s:375 0xd1e74 e24dd040 SUB $64, R13, R13
This will subtract between 64 and 127 bytes from the stack pointer and end up aligning it to 64 bytes. The rest of the code here operates on a datastructure that's ~64 bytes and which will be placed at this point in the stack.

This, as well as other SUB $n, R13 and MOVM.DB.W's, further decrement SP from the alreadly-decremented-by-280 SP. That may cause overflow, because the stack split check code cannot see them.

Can the GC preempt arbitrarily, or only at stack splits?

Only on preemption points, i.e. stack splits. So this is fine.

@agl
Copy link
Contributor

agl commented Oct 18, 2016

Thank you for that. It seems that this assembly code has been working only because of luck for a while. I can take a shot at fixing but but there's still something that I don't understand:

sum_arm.s:367 0xd1e58 e52de11c MOVW.W R14, -0x11c(R13)

I assumed that this wrote the contents of R14 to the address (R13 - 0x11c). But your comments suggest that it also updates the value of R13? Is that the case? Does it actually set R13 = R13 - 0x11c and then do a write?

(Probably I should try and setup an ARM environment with qemu or something but I currently don't have anything like that.)

@cherrymui
Copy link
Member

Yes, it does both *(R13-0x11c) = R14 and R13 -= 0x11c. That's the .W suffix for. (On other architectures this may be two instructions, one updates SP and one stores LR.)

@gopherbot
Copy link

CL https://golang.org/cl/31443 mentions this issue.

@gopherbot
Copy link

CL https://golang.org/cl/31592 mentions this issue.

gopherbot pushed a commit that referenced this issue Oct 20, 2016
This change updates the vendored copy of x/crypto/poly1305, specifically
to include the following changes:
  3ded668 poly1305: enable assembly for ARM in Go 1.6.
  dec8741 poly1305: fix stack handling in sum_arm.s

Fixes #17499.

Change-Id: I8f152da9599bd15bb976f630b0ef602be05143d3
Reviewed-on: https://go-review.googlesource.com/31592
Run-TryBot: Adam Langley <agl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
@golang golang locked and limited conversation to collaborators Oct 20, 2017
c-expert-zigbee pushed a commit to c-expert-zigbee/crypto_go that referenced this issue Mar 28, 2022
Up till now, sum_arm.s was working only because of luck. It was written
assuming that it had stack space below the current stack pointer, but Go
decrements the stack pointer in the function prelude, so it was just
writing off the end of the stack.

This change fixes the stack manipulation so that it only writes within
the bounds.

Fixes golang/go#17499.

Change-Id: I1951b3344c21f6bd6ade79da8b96dd1bb68180db
Reviewed-on: https://go-review.googlesource.com/31443
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
c-expert-zigbee added a commit to c-expert-zigbee/crypto_go that referenced this issue Mar 29, 2022
Up till now, sum_arm.s was working only because of luck. It was written
assuming that it had stack space below the current stack pointer, but Go
decrements the stack pointer in the function prelude, so it was just
writing off the end of the stack.

This change fixes the stack manipulation so that it only writes within
the bounds.

Fixes golang/go#17499.

Change-Id: I1951b3344c21f6bd6ade79da8b96dd1bb68180db
Reviewed-on: https://go-review.googlesource.com/31443
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
@rsc rsc unassigned agl Jun 23, 2022
LewiGoddard pushed a commit to LewiGoddard/crypto that referenced this issue Feb 16, 2023
Up till now, sum_arm.s was working only because of luck. It was written
assuming that it had stack space below the current stack pointer, but Go
decrements the stack pointer in the function prelude, so it was just
writing off the end of the stack.

This change fixes the stack manipulation so that it only writes within
the bounds.

Fixes golang/go#17499.

Change-Id: I1951b3344c21f6bd6ade79da8b96dd1bb68180db
Reviewed-on: https://go-review.googlesource.com/31443
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
BiiChris pushed a commit to BiiChris/crypto that referenced this issue Sep 15, 2023
Up till now, sum_arm.s was working only because of luck. It was written
assuming that it had stack space below the current stack pointer, but Go
decrements the stack pointer in the function prelude, so it was just
writing off the end of the stack.

This change fixes the stack manipulation so that it only writes within
the bounds.

Fixes golang/go#17499.

Change-Id: I1951b3344c21f6bd6ade79da8b96dd1bb68180db
Reviewed-on: https://go-review.googlesource.com/31443
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

5 participants