// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package wasm import ( "bytes" "cmd/internal/obj" "cmd/internal/obj/wasm" "cmd/internal/objabi" "cmd/link/internal/ld" "cmd/link/internal/loader" "cmd/link/internal/sym" "encoding/binary" "fmt" "internal/buildcfg" "io" "regexp" ) const ( I32 = 0x7F I64 = 0x7E F32 = 0x7D F64 = 0x7C ) const ( sectionCustom = 0 sectionType = 1 sectionImport = 2 sectionFunction = 3 sectionTable = 4 sectionMemory = 5 sectionGlobal = 6 sectionExport = 7 sectionStart = 8 sectionElement = 9 sectionCode = 10 sectionData = 11 ) // funcValueOffset is the offset between the PC_F value of a function and the index of the function in WebAssembly const funcValueOffset = 0x1000 // TODO(neelance): make function addresses play nice with heap addresses func gentext(ctxt *ld.Link, ldr *loader.Loader) { } type wasmFunc struct { Module string Name string Type uint32 Code []byte } type wasmFuncType struct { Params []byte Results []byte } func readWasmImport(ldr *loader.Loader, s loader.Sym) obj.WasmImport { reportError := func(err error) { panic(fmt.Sprintf("failed to read WASM import in sym %v: %v", s, err)) } data := ldr.Data(s) readUint32 := func() (v uint32) { v = binary.LittleEndian.Uint32(data) data = data[4:] return } readUint64 := func() (v uint64) { v = binary.LittleEndian.Uint64(data) data = data[8:] return } readByte := func() byte { if len(data) == 0 { reportError(io.EOF) } b := data[0] data = data[1:] return b } readString := func() string { n := readUint32() s := string(data[:n]) data = data[n:] return s } var wi obj.WasmImport wi.Module = readString() wi.Name = readString() wi.Params = make([]obj.WasmField, readUint32()) for i := range wi.Params { wi.Params[i].Type = obj.WasmFieldType(readByte()) wi.Params[i].Offset = int64(readUint64()) } wi.Results = make([]obj.WasmField, readUint32()) for i := range wi.Results { wi.Results[i].Type = obj.WasmFieldType(readByte()) wi.Results[i].Offset = int64(readUint64()) } return wi } var wasmFuncTypes = map[string]*wasmFuncType{ "_rt0_wasm_js": {Params: []byte{}}, // "_rt0_wasm_wasip1": {Params: []byte{}}, // "wasm_export__start": {}, // "wasm_export_run": {Params: []byte{I32, I32}}, // argc, argv "wasm_export_resume": {Params: []byte{}}, // "wasm_export_getsp": {Results: []byte{I32}}, // sp "wasm_pc_f_loop": {Params: []byte{}}, // "runtime.wasmDiv": {Params: []byte{I64, I64}, Results: []byte{I64}}, // x, y -> x/y "runtime.wasmTruncS": {Params: []byte{F64}, Results: []byte{I64}}, // x -> int(x) "runtime.wasmTruncU": {Params: []byte{F64}, Results: []byte{I64}}, // x -> uint(x) "gcWriteBarrier": {Params: []byte{I64}, Results: []byte{I64}}, // #bytes -> bufptr "runtime.gcWriteBarrier1": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier2": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier3": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier4": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier5": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier6": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier7": {Results: []byte{I64}}, // -> bufptr "runtime.gcWriteBarrier8": {Results: []byte{I64}}, // -> bufptr "cmpbody": {Params: []byte{I64, I64, I64, I64}, Results: []byte{I64}}, // a, alen, b, blen -> -1/0/1 "memeqbody": {Params: []byte{I64, I64, I64}, Results: []byte{I64}}, // a, b, len -> 0/1 "memcmp": {Params: []byte{I32, I32, I32}, Results: []byte{I32}}, // a, b, len -> <0/0/>0 "memchr": {Params: []byte{I32, I32, I32}, Results: []byte{I32}}, // s, c, len -> index } func assignAddress(ldr *loader.Loader, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64) { // WebAssembly functions do not live in the same address space as the linear memory. // Instead, WebAssembly automatically assigns indices. Imported functions (section "import") // have indices 0 to n. They are followed by native functions (sections "function" and "code") // with indices n+1 and following. // // The following rules describe how wasm handles function indices and addresses: // PC_F = funcValueOffset + WebAssembly function index (not including the imports) // s.Value = PC = PC_F<<16 + PC_B // // The funcValueOffset is necessary to avoid conflicts with expectations // that the Go runtime has about function addresses. // The field "s.Value" corresponds to the concept of PC at runtime. // However, there is no PC register, only PC_F and PC_B. PC_F denotes the function, // PC_B the resume point inside of that function. The entry of the function has PC_B = 0. ldr.SetSymSect(s, sect) ldr.SetSymValue(s, int64(funcValueOffset+va/ld.MINFUNC)<<16) // va starts at zero va += uint64(ld.MINFUNC) return sect, n, va } type wasmDataSect struct { sect *sym.Section data []byte } var dataSects []wasmDataSect func asmb(ctxt *ld.Link, ldr *loader.Loader) { sections := []*sym.Section{ ldr.SymSect(ldr.Lookup("runtime.rodata", 0)), ldr.SymSect(ldr.Lookup("runtime.typelink", 0)), ldr.SymSect(ldr.Lookup("runtime.itablink", 0)), ldr.SymSect(ldr.Lookup("runtime.symtab", 0)), ldr.SymSect(ldr.Lookup("runtime.pclntab", 0)), ldr.SymSect(ldr.Lookup("runtime.noptrdata", 0)), ldr.SymSect(ldr.Lookup("runtime.data", 0)), } dataSects = make([]wasmDataSect, len(sections)) for i, sect := range sections { data := ld.DatblkBytes(ctxt, int64(sect.Vaddr), int64(sect.Length)) dataSects[i] = wasmDataSect{sect, data} } } // asmb writes the final WebAssembly module binary. // Spec: https://webassembly.github.io/spec/core/binary/modules.html func asmb2(ctxt *ld.Link, ldr *loader.Loader) { types := []*wasmFuncType{ // For normal Go functions, the single parameter is PC_B, // the return value is // 0 if the function returned normally or // 1 if the stack needs to be unwound. {Params: []byte{I32}, Results: []byte{I32}}, } // collect host imports (functions that get imported from the WebAssembly host, usually JavaScript) // we store the import index of each imported function, so the R_WASMIMPORT relocation // can write the correct index after a "call" instruction // these are added as import statements to the top of the WebAssembly binary var hostImports []*wasmFunc hostImportMap := make(map[loader.Sym]int64) for _, fn := range ctxt.Textp { relocs := ldr.Relocs(fn) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) if r.Type() == objabi.R_WASMIMPORT { if lsym, ok := ldr.WasmImportSym(fn); ok { wi := readWasmImport(ldr, lsym) hostImportMap[fn] = int64(len(hostImports)) hostImports = append(hostImports, &wasmFunc{ Module: wi.Module, Name: wi.Name, Type: lookupType(&wasmFuncType{ Params: fieldsToTypes(wi.Params), Results: fieldsToTypes(wi.Results), }, &types), }) } else { panic(fmt.Sprintf("missing wasm symbol for %s", ldr.SymName(r.Sym()))) } } } } // collect functions with WebAssembly body var buildid []byte fns := make([]*wasmFunc, len(ctxt.Textp)) for i, fn := range ctxt.Textp { wfn := new(bytes.Buffer) if ldr.SymName(fn) == "go:buildid" { writeUleb128(wfn, 0) // number of sets of locals writeI32Const(wfn, 0) wfn.WriteByte(0x0b) // end buildid = ldr.Data(fn) } else { // Relocations have variable length, handle them here. relocs := ldr.Relocs(fn) P := ldr.Data(fn) off := int32(0) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) if r.Siz() == 0 { continue // skip marker relocations } wfn.Write(P[off:r.Off()]) off = r.Off() rs := r.Sym() switch r.Type() { case objabi.R_ADDR: writeSleb128(wfn, ldr.SymValue(rs)+r.Add()) case objabi.R_CALL: writeSleb128(wfn, int64(len(hostImports))+ldr.SymValue(rs)>>16-funcValueOffset) case objabi.R_WASMIMPORT: writeSleb128(wfn, hostImportMap[rs]) default: ldr.Errorf(fn, "bad reloc type %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type())) continue } } wfn.Write(P[off:]) } typ := uint32(0) if sig, ok := wasmFuncTypes[ldr.SymName(fn)]; ok { typ = lookupType(sig, &types) } name := nameRegexp.ReplaceAllString(ldr.SymName(fn), "_") fns[i] = &wasmFunc{Name: name, Type: typ, Code: wfn.Bytes()} } ctxt.Out.Write([]byte{0x00, 0x61, 0x73, 0x6d}) // magic ctxt.Out.Write([]byte{0x01, 0x00, 0x00, 0x00}) // version // Add any buildid early in the binary: if len(buildid) != 0 { writeBuildID(ctxt, buildid) } writeTypeSec(ctxt, types) writeImportSec(ctxt, hostImports) writeFunctionSec(ctxt, fns) writeTableSec(ctxt, fns) writeMemorySec(ctxt, ldr) writeGlobalSec(ctxt) writeExportSec(ctxt, ldr, len(hostImports)) writeElementSec(ctxt, uint64(len(hostImports)), uint64(len(fns))) writeCodeSec(ctxt, fns) writeDataSec(ctxt) writeProducerSec(ctxt) if !*ld.FlagS { writeNameSec(ctxt, len(hostImports), fns) } } func lookupType(sig *wasmFuncType, types *[]*wasmFuncType) uint32 { for i, t := range *types { if bytes.Equal(sig.Params, t.Params) && bytes.Equal(sig.Results, t.Results) { return uint32(i) } } *types = append(*types, sig) return uint32(len(*types) - 1) } func writeSecHeader(ctxt *ld.Link, id uint8) int64 { ctxt.Out.WriteByte(id) sizeOffset := ctxt.Out.Offset() ctxt.Out.Write(make([]byte, 5)) // placeholder for length return sizeOffset } func writeSecSize(ctxt *ld.Link, sizeOffset int64) { endOffset := ctxt.Out.Offset() ctxt.Out.SeekSet(sizeOffset) writeUleb128FixedLength(ctxt.Out, uint64(endOffset-sizeOffset-5), 5) ctxt.Out.SeekSet(endOffset) } func writeBuildID(ctxt *ld.Link, buildid []byte) { sizeOffset := writeSecHeader(ctxt, sectionCustom) writeName(ctxt.Out, "go:buildid") ctxt.Out.Write(buildid) writeSecSize(ctxt, sizeOffset) } // writeTypeSec writes the section that declares all function types // so they can be referenced by index. func writeTypeSec(ctxt *ld.Link, types []*wasmFuncType) { sizeOffset := writeSecHeader(ctxt, sectionType) writeUleb128(ctxt.Out, uint64(len(types))) for _, t := range types { ctxt.Out.WriteByte(0x60) // functype writeUleb128(ctxt.Out, uint64(len(t.Params))) for _, v := range t.Params { ctxt.Out.WriteByte(byte(v)) } writeUleb128(ctxt.Out, uint64(len(t.Results))) for _, v := range t.Results { ctxt.Out.WriteByte(byte(v)) } } writeSecSize(ctxt, sizeOffset) } // writeImportSec writes the section that lists the functions that get // imported from the WebAssembly host, usually JavaScript. func writeImportSec(ctxt *ld.Link, hostImports []*wasmFunc) { sizeOffset := writeSecHeader(ctxt, sectionImport) writeUleb128(ctxt.Out, uint64(len(hostImports))) // number of imports for _, fn := range hostImports { if fn.Module != "" { writeName(ctxt.Out, fn.Module) } else { writeName(ctxt.Out, wasm.GojsModule) // provided by the import object in wasm_exec.js } writeName(ctxt.Out, fn.Name) ctxt.Out.WriteByte(0x00) // func import writeUleb128(ctxt.Out, uint64(fn.Type)) } writeSecSize(ctxt, sizeOffset) } // writeFunctionSec writes the section that declares the types of functions. // The bodies of these functions will later be provided in the "code" section. func writeFunctionSec(ctxt *ld.Link, fns []*wasmFunc) { sizeOffset := writeSecHeader(ctxt, sectionFunction) writeUleb128(ctxt.Out, uint64(len(fns))) for _, fn := range fns { writeUleb128(ctxt.Out, uint64(fn.Type)) } writeSecSize(ctxt, sizeOffset) } // writeTableSec writes the section that declares tables. Currently there is only a single table // that is used by the CallIndirect operation to dynamically call any function. // The contents of the table get initialized by the "element" section. func writeTableSec(ctxt *ld.Link, fns []*wasmFunc) { sizeOffset := writeSecHeader(ctxt, sectionTable) numElements := uint64(funcValueOffset + len(fns)) writeUleb128(ctxt.Out, 1) // number of tables ctxt.Out.WriteByte(0x70) // type: anyfunc ctxt.Out.WriteByte(0x00) // no max writeUleb128(ctxt.Out, numElements) // min writeSecSize(ctxt, sizeOffset) } // writeMemorySec writes the section that declares linear memories. Currently one linear memory is being used. // Linear memory always starts at address zero. More memory can be requested with the GrowMemory instruction. func writeMemorySec(ctxt *ld.Link, ldr *loader.Loader) { sizeOffset := writeSecHeader(ctxt, sectionMemory) dataSection := ldr.SymSect(ldr.Lookup("runtime.data", 0)) dataEnd := dataSection.Vaddr + dataSection.Length var initialSize = dataEnd + 16<<20 // 16MB, enough for runtime init without growing const wasmPageSize = 64 << 10 // 64KB writeUleb128(ctxt.Out, 1) // number of memories ctxt.Out.WriteByte(0x00) // no maximum memory size writeUleb128(ctxt.Out, initialSize/wasmPageSize) // minimum (initial) memory size writeSecSize(ctxt, sizeOffset) } // writeGlobalSec writes the section that declares global variables. func writeGlobalSec(ctxt *ld.Link) { sizeOffset := writeSecHeader(ctxt, sectionGlobal) globalRegs := []byte{ I32, // 0: SP I64, // 1: CTXT I64, // 2: g I64, // 3: RET0 I64, // 4: RET1 I64, // 5: RET2 I64, // 6: RET3 I32, // 7: PAUSE } writeUleb128(ctxt.Out, uint64(len(globalRegs))) // number of globals for _, typ := range globalRegs { ctxt.Out.WriteByte(typ) ctxt.Out.WriteByte(0x01) // var switch typ { case I32: writeI32Const(ctxt.Out, 0) case I64: writeI64Const(ctxt.Out, 0) } ctxt.Out.WriteByte(0x0b) // end } writeSecSize(ctxt, sizeOffset) } // writeExportSec writes the section that declares exports. // Exports can be accessed by the WebAssembly host, usually JavaScript. // The wasm_export_* functions and the linear memory get exported. func writeExportSec(ctxt *ld.Link, ldr *loader.Loader, lenHostImports int) { sizeOffset := writeSecHeader(ctxt, sectionExport) switch buildcfg.GOOS { case "wasip1": writeUleb128(ctxt.Out, 2) // number of exports s := ldr.Lookup("_rt0_wasm_wasip1", 0) idx := uint32(lenHostImports) + uint32(ldr.SymValue(s)>>16) - funcValueOffset writeName(ctxt.Out, "_start") // the wasi entrypoint ctxt.Out.WriteByte(0x00) // func export writeUleb128(ctxt.Out, uint64(idx)) // funcidx writeName(ctxt.Out, "memory") // memory in wasi ctxt.Out.WriteByte(0x02) // mem export writeUleb128(ctxt.Out, 0) // memidx case "js": writeUleb128(ctxt.Out, 4) // number of exports for _, name := range []string{"run", "resume", "getsp"} { s := ldr.Lookup("wasm_export_"+name, 0) idx := uint32(lenHostImports) + uint32(ldr.SymValue(s)>>16) - funcValueOffset writeName(ctxt.Out, name) // inst.exports.run/resume/getsp in wasm_exec.js ctxt.Out.WriteByte(0x00) // func export writeUleb128(ctxt.Out, uint64(idx)) // funcidx } writeName(ctxt.Out, "mem") // inst.exports.mem in wasm_exec.js ctxt.Out.WriteByte(0x02) // mem export writeUleb128(ctxt.Out, 0) // memidx default: ld.Exitf("internal error: writeExportSec: unrecognized GOOS %s", buildcfg.GOOS) } writeSecSize(ctxt, sizeOffset) } // writeElementSec writes the section that initializes the tables declared by the "table" section. // The table for CallIndirect gets initialized in a very simple way so that each table index (PC_F value) // maps linearly to the function index (numImports + PC_F). func writeElementSec(ctxt *ld.Link, numImports, numFns uint64) { sizeOffset := writeSecHeader(ctxt, sectionElement) writeUleb128(ctxt.Out, 1) // number of element segments writeUleb128(ctxt.Out, 0) // tableidx writeI32Const(ctxt.Out, funcValueOffset) ctxt.Out.WriteByte(0x0b) // end writeUleb128(ctxt.Out, numFns) // number of entries for i := uint64(0); i < numFns; i++ { writeUleb128(ctxt.Out, numImports+i) } writeSecSize(ctxt, sizeOffset) } // writeCodeSec writes the section that provides the function bodies for the functions // declared by the "func" section. func writeCodeSec(ctxt *ld.Link, fns []*wasmFunc) { sizeOffset := writeSecHeader(ctxt, sectionCode) writeUleb128(ctxt.Out, uint64(len(fns))) // number of code entries for _, fn := range fns { writeUleb128(ctxt.Out, uint64(len(fn.Code))) ctxt.Out.Write(fn.Code) } writeSecSize(ctxt, sizeOffset) } // writeDataSec writes the section that provides data that will be used to initialize the linear memory. func writeDataSec(ctxt *ld.Link) { sizeOffset := writeSecHeader(ctxt, sectionData) type dataSegment struct { offset int32 data []byte } // Omit blocks of zeroes and instead emit data segments with offsets skipping the zeroes. // This reduces the size of the WebAssembly binary. We use 8 bytes as an estimate for the // overhead of adding a new segment (same as wasm-opt's memory-packing optimization uses). const segmentOverhead = 8 // Generate at most this many segments. A higher number of segments gets rejected by some WebAssembly runtimes. const maxNumSegments = 100000 var segments []*dataSegment for secIndex, ds := range dataSects { data := ds.data offset := int32(ds.sect.Vaddr) // skip leading zeroes for len(data) > 0 && data[0] == 0 { data = data[1:] offset++ } for len(data) > 0 { dataLen := int32(len(data)) var segmentEnd, zeroEnd int32 if len(segments)+(len(dataSects)-secIndex) == maxNumSegments { segmentEnd = dataLen zeroEnd = dataLen } else { for { // look for beginning of zeroes for segmentEnd < dataLen && data[segmentEnd] != 0 { segmentEnd++ } // look for end of zeroes zeroEnd = segmentEnd for zeroEnd < dataLen && data[zeroEnd] == 0 { zeroEnd++ } // emit segment if omitting zeroes reduces the output size if zeroEnd-segmentEnd >= segmentOverhead || zeroEnd == dataLen { break } segmentEnd = zeroEnd } } segments = append(segments, &dataSegment{ offset: offset, data: data[:segmentEnd], }) data = data[zeroEnd:] offset += zeroEnd } } writeUleb128(ctxt.Out, uint64(len(segments))) // number of data entries for _, seg := range segments { writeUleb128(ctxt.Out, 0) // memidx writeI32Const(ctxt.Out, seg.offset) ctxt.Out.WriteByte(0x0b) // end writeUleb128(ctxt.Out, uint64(len(seg.data))) ctxt.Out.Write(seg.data) } writeSecSize(ctxt, sizeOffset) } // writeProducerSec writes an optional section that reports the source language and compiler version. func writeProducerSec(ctxt *ld.Link) { sizeOffset := writeSecHeader(ctxt, sectionCustom) writeName(ctxt.Out, "producers") writeUleb128(ctxt.Out, 2) // number of fields writeName(ctxt.Out, "language") // field name writeUleb128(ctxt.Out, 1) // number of values writeName(ctxt.Out, "Go") // value: name writeName(ctxt.Out, buildcfg.Version) // value: version writeName(ctxt.Out, "processed-by") // field name writeUleb128(ctxt.Out, 1) // number of values writeName(ctxt.Out, "Go cmd/compile") // value: name writeName(ctxt.Out, buildcfg.Version) // value: version writeSecSize(ctxt, sizeOffset) } var nameRegexp = regexp.MustCompile(`[^\w.]`) // writeNameSec writes an optional section that assigns names to the functions declared by the "func" section. // The names are only used by WebAssembly stack traces, debuggers and decompilers. // TODO(neelance): add symbol table of DATA symbols func writeNameSec(ctxt *ld.Link, firstFnIndex int, fns []*wasmFunc) { sizeOffset := writeSecHeader(ctxt, sectionCustom) writeName(ctxt.Out, "name") sizeOffset2 := writeSecHeader(ctxt, 0x01) // function names writeUleb128(ctxt.Out, uint64(len(fns))) for i, fn := range fns { writeUleb128(ctxt.Out, uint64(firstFnIndex+i)) writeName(ctxt.Out, fn.Name) } writeSecSize(ctxt, sizeOffset2) writeSecSize(ctxt, sizeOffset) } type nameWriter interface { io.ByteWriter io.Writer } func writeI32Const(w io.ByteWriter, v int32) { w.WriteByte(0x41) // i32.const writeSleb128(w, int64(v)) } func writeI64Const(w io.ByteWriter, v int64) { w.WriteByte(0x42) // i64.const writeSleb128(w, v) } func writeName(w nameWriter, name string) { writeUleb128(w, uint64(len(name))) w.Write([]byte(name)) } func writeUleb128(w io.ByteWriter, v uint64) { if v < 128 { w.WriteByte(uint8(v)) return } more := true for more { c := uint8(v & 0x7f) v >>= 7 more = v != 0 if more { c |= 0x80 } w.WriteByte(c) } } func writeUleb128FixedLength(w io.ByteWriter, v uint64, length int) { for i := 0; i < length; i++ { c := uint8(v & 0x7f) v >>= 7 if i < length-1 { c |= 0x80 } w.WriteByte(c) } if v != 0 { panic("writeUleb128FixedLength: length too small") } } func writeSleb128(w io.ByteWriter, v int64) { more := true for more { c := uint8(v & 0x7f) s := uint8(v & 0x40) v >>= 7 more = !((v == 0 && s == 0) || (v == -1 && s != 0)) if more { c |= 0x80 } w.WriteByte(c) } } func fieldsToTypes(fields []obj.WasmField) []byte { b := make([]byte, len(fields)) for i, f := range fields { switch f.Type { case obj.WasmI32, obj.WasmPtr: b[i] = I32 case obj.WasmI64: b[i] = I64 case obj.WasmF32: b[i] = F32 case obj.WasmF64: b[i] = F64 default: panic(fmt.Sprintf("fieldsToTypes: unknown field type: %d", f.Type)) } } return b }