Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/asm: VMOVDQU global, Y8 misassembled #19518

Closed
pwaller opened this issue Mar 12, 2017 · 1 comment
Closed

cmd/asm: VMOVDQU global, Y8 misassembled #19518

pwaller opened this issue Mar 12, 2017 · 1 comment
Milestone

Comments

@pwaller
Copy link
Contributor

pwaller commented Mar 12, 2017

Version: go1.8 Arch: amd64.

The go assembler appears to mistreat this example, causing a very subtle failure where an attempt to read a global into registers <= Y7 works but > Y7 causes the code to segfault, since the source address is incorrect.

#include "textflag.h"

GLOBL zeros<>(SB),RODATA,$64


TEXT ·testASM(SB),NOSPLIT,$0
	VMOVDQU zeros<>(SB), Y7        // OK
	VMOVDQU zeros<>(SB), Y8        // Causes segfault
	RET

GNU objdump 2.26 output. Note how objdump correctly identifies 0x55c58 as being <zeros>, but 0x55c5000 is incorrect, not mapped at runtime and causes a segfault.

  4f3b80:       c5 fe 6f 3d 58 5c 05    vmovdqu 0x55c58(%rip),%ymm7        # 5497e0 <zeros>
  4f3b87:       00 
  4f3b88:       c5 7e 6f 05 00 50 5c    vmovdqu 0x55c5000(%rip),%ymm8        # 5ab8b90 <runtime.end+0x54c3910>
  4f3b8f:       05 
  4f3b90:       00 cc                   add    %cl,%ah

The next expansion shows the objdump output you get if you load zero into each register Y0-Y15 in turn.

TEXT ·testASM(SB),NOSPLIT,$0
	VMOVDQU zeros<>(SB), Y0
	VMOVDQU zeros<>(SB), Y1
	VMOVDQU zeros<>(SB), Y2
	VMOVDQU zeros<>(SB), Y3
	VMOVDQU zeros<>(SB), Y4
	VMOVDQU zeros<>(SB), Y5
	VMOVDQU zeros<>(SB), Y6
	VMOVDQU zeros<>(SB), Y7
	VMOVDQU zeros<>(SB), Y8
	VMOVDQU zeros<>(SB), Y9
	VMOVDQU zeros<>(SB), Y10
	VMOVDQU zeros<>(SB), Y11
	VMOVDQU zeros<>(SB), Y12
	VMOVDQU zeros<>(SB), Y13
	VMOVDQU zeros<>(SB), Y14
	VMOVDQU zeros<>(SB), Y15
	RET
00000000004f3b80 <opt.testASM>:
  4f3b80:       c5 fe 6f 05 58 5c 05    vmovdqu 0x55c58(%rip),%ymm0        # 5497e0 <zeros>
  4f3b87:       00 
  4f3b88:       c5 fe 6f 0d 50 5c 05    vmovdqu 0x55c50(%rip),%ymm1        # 5497e0 <zeros>
  4f3b8f:       00 
  4f3b90:       c5 fe 6f 15 48 5c 05    vmovdqu 0x55c48(%rip),%ymm2        # 5497e0 <zeros>
  4f3b97:       00 
  4f3b98:       c5 fe 6f 1d 40 5c 05    vmovdqu 0x55c40(%rip),%ymm3        # 5497e0 <zeros>
  4f3b9f:       00 
  4f3ba0:       c5 fe 6f 25 38 5c 05    vmovdqu 0x55c38(%rip),%ymm4        # 5497e0 <zeros>
  4f3ba7:       00 
  4f3ba8:       c5 fe 6f 2d 30 5c 05    vmovdqu 0x55c30(%rip),%ymm5        # 5497e0 <zeros>
  4f3baf:       00 
  4f3bb0:       c5 fe 6f 35 28 5c 05    vmovdqu 0x55c28(%rip),%ymm6        # 5497e0 <zeros>
  4f3bb7:       00 
  4f3bb8:       c5 fe 6f 3d 20 5c 05    vmovdqu 0x55c20(%rip),%ymm7        # 5497e0 <zeros>
  4f3bbf:       00 
  4f3bc0:       c5 7e 6f 05 00 18 5c    vmovdqu 0x55c1800(%rip),%ymm8        # 5ab53c8 <runtime.end+0x54c0148>
  4f3bc7:       05 
  4f3bc8:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3bcb:       0d 00 10 5c 05          or     $0x55c1000,%eax
  4f3bd0:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3bd3:       15 00 08 5c 05          adc    $0x55c0800,%eax
  4f3bd8:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3bdb:       1d 00 00 5c 05          sbb    $0x55c0000,%eax
  4f3be0:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3be3:       25 00 f8 5b 05          and    $0x55bf800,%eax
  4f3be8:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3beb:       2d 00 f0 5b 05          sub    $0x55bf000,%eax
  4f3bf0:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3bf3:       35 00 e8 5b 05          xor    $0x55be800,%eax
  4f3bf8:       00 7e 6f                add    %bh,0x6f(%rsi)
  4f3bfb:       3d 00 e0 5b 05          cmp    $0x55be000,%eax
  4f3c00:       00 cc                   add    %cl,%ah
  4f3c02:       cc                      int3

 

go tool objdump provided below for completeness, but is junk.

TEXT opt.testASM(SB) opt/opt_amd64.s
        opt_amd64.s:11  0x4f3b80        c5              ?                       
        opt_amd64.s:11  0x4f3b81        fe              ?                       
        opt_amd64.s:11  0x4f3b82        6f              OUTSD DS:0(SI), DX      
        opt_amd64.s:11  0x4f3b83        3d585c0500      CMPL $0x55c58, AX       
        opt_amd64.s:12  0x4f3b88        c5              ?                       
        opt_amd64.s:12  0x4f3b89        7e6f            JLE 0x4f3bfa            
        opt_amd64.s:12  0x4f3b8b        0500505c05      ADDL $0x55c5000, AX     
        opt_amd64.s:13  0x4f3b90        00              ?                       
@josharian josharian added this to the Go1.9 milestone Mar 12, 2017
@gopherbot
Copy link

CL https://golang.org/cl/38138 mentions this issue.

@golang golang locked and limited conversation to collaborators Apr 20, 2018
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

3 participants