From 55c4e8c349da1f0b59d1ff3601aa202b7c97e178 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Aleksey=20Kliger=20=28=CE=BBgeek=29?= Date: Tue, 16 May 2023 16:10:38 -0400 Subject: [PATCH] [wasm] Webcil-in-WebAssembly (#85932) Define a WebAssembly module wrapper for Webcil assemblies. Contributes to #80807 ### Why In some settings serving `application/octet-stream` data, or files with weird extensions will trigger firewalls or AV tools. But let's assume that if you're interested in deploying a .NET WebAssembly app, you're in an environment that can at least serve WebAssembly modules. ### How Essentially we serve this WebAssembly module: ```wat (module (data "\0f\00\00\00") ;; data segment 0: payload size (data "webcil Payload\cc") ;; data segment 1: webcil payload (memory (import "webcil" "memory") 1) (global (export "webcilVersion") i32 (i32.const 0)) (func (export "getWebcilSize") (param $destPtr i32) (result) local.get $destPtr i32.const 0 i32.const 4 memory.init 0) (func (export "getWebcilPayload") (param $d i32) (param $n i32) (result) local.get $d i32.const 0 local.get $n memory.init 1)) ``` The module exports two WebAssembly functions `getWebcilSize` and `getWebcilPayload` that write some bytes (being the size or payload of the webcil assembly) to the linear memory at a given offset. The module also exports the constant `webcilVersion` to version the wrapper format. So a runtime or tool that wants to consume the webcil module can do something like: ```js const wasmModule = new WebAssembly.Module (...); const wasmMemory = new WebAssembly.Memory ({initial: 1}); const wasmInstance = new WebAssembly.Instance(wasmModule, {webcil: {memory: wasmMemory}}); const { getWebcilPayload, webcilVersion, getWebcilSize } = wasmInstance.exports; console.log (`Version ${webcilVersion.value}`); getWebcilSize(0); const size = new Int32Array (wasmMemory.buffer)[0] console.log (`Size ${size}`); console.log (new Uint8Array(wasmMemory.buffer).subarray(0, 20)); getWebcilPayload(4, size); console.log (new Uint8Array(wasmMemory.buffer).subarray(0, 20)); ``` ### How (Part 2) But actually, we will define the wrapper to consist of exactly 2 data segments in the WebAssembly data section: segment 0 is 4 bytes and encodes the webcil payload size; and segment 1 is of variable size and contains the webcil payload. So to load a webcil-in-wasm module, the runtime gets the _raw bytes_ of the WebAssembly module (ie: without instantiating it), and parses it to find the data section, assert that there are 2 segments, ensure they're both passive, and get the data directly from segment 1. --- * Add option to emit webcil inside a wasm module wrapper * [mono][loader] implement a webcil-in-wasm reader * reword WebcilWasmWrapper summary comment * update the Webcil spec to include the WebAssembly wrapper module * Adjust RVA map offsets to account for wasm prefix MonoImage:raw_data is used as a base when applying the RVA map to map virtual addresses to physical offsets in the assembly. With webcil-in-wasm there's an extra wasm prefix before the webcil payload starts, so we need to account for this extra data when creating the mapping. An alternative is to compute the correct offsets as part of generating the webcil, but that would entangle the wasm module and the webcil payload. The current (somewhat hacky approach) keeps them logically separate. * Add a note about the rva mapping to the spec * Serve webcil-in-wasm as .wasm * remove old .webcil support from Sdk Pack Tasks * Implement support for webcil in wasm in the managed WebcilReader * align webcil payload to a 4-byte boundary within the wasm module Add padding to data segment 0 to ensure that data segment 1's payload (ie the webcil content itself) is 4-byte aligned * assert that webcil raw data is 4-byte aligned * add 4-byte alignment requirement to the webcil spec * Don't modify MonoImageStorage:raw_data instead just keep track of the webcil offset in the MonoImageStorage. This introduces a situation where MonoImage:raw_data is different from MonoImageStorage:raw_data. The one to use for accessing IL and metadata is MonoImage:raw_data. The storage pointer is just used by the image loading machinery --------- Co-authored-by: Larry Ewing --- docs/design/mono/webcil.md | 84 +++++++- .../src/Webcil/WasmModuleReader.cs | 148 ++++++++++++++ .../src/Webcil/WebcilConverter.cs | 24 ++- .../src/Webcil/WebcilReader.cs | 62 +++++- .../src/Webcil/WebcilWasmWrapper.cs | 226 +++++++++++++++++++++ src/mono/mono/metadata/assembly.c | 14 ++ src/mono/mono/metadata/image.c | 8 +- src/mono/mono/metadata/metadata-internals.h | 8 +- src/mono/mono/metadata/mono-debug.c | 8 + src/mono/mono/metadata/webcil-loader.c | 109 +++++++++- src/mono/mono/metadata/webcil-loader.h | 6 +- src/mono/mono/mini/monovm.c | 13 +- src/mono/mono/utils/CMakeLists.txt | 5 +- src/mono/mono/utils/wasm-module-reader.c | 140 +++++++++++++ src/mono/mono/utils/wasm-module-reader.h | 35 ++++ src/mono/mono/utils/wasm-sections.def | 21 ++ src/mono/sample/wasm/browser-advanced/index.html | 4 +- .../Wasm.Build.Tests/Blazor/BuildPublishTests.cs | 2 +- src/mono/wasm/Wasm.Build.Tests/BuildTestBase.cs | 6 +- src/mono/wasm/build/WasmApp.targets | 2 +- .../debugger/BrowserDebugProxy/MonoSDBHelper.cs | 7 + .../debugger/DebuggerTestSuite/DebuggerTestBase.cs | 2 + .../wasm/debugger/DebuggerTestSuite/MonoJsTests.cs | 2 +- src/tasks/Common/Utils.cs | 2 + .../ComputeWasmPublishAssets.cs | 2 +- .../ConvertDllsToWebCil.cs | 6 +- .../GenerateWasmBootJson.cs | 2 +- src/tasks/WasmAppBuilder/WasmAppBuilder.cs | 6 +- 28 files changed, 906 insertions(+), 48 deletions(-) create mode 100644 src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WasmModuleReader.cs create mode 100644 src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilWasmWrapper.cs create mode 100644 src/mono/mono/utils/wasm-module-reader.c create mode 100644 src/mono/mono/utils/wasm-module-reader.h create mode 100644 src/mono/mono/utils/wasm-sections.def diff --git a/docs/design/mono/webcil.md b/docs/design/mono/webcil.md index 3bcb7d6..346d763 100644 --- a/docs/design/mono/webcil.md +++ b/docs/design/mono/webcil.md @@ -2,7 +2,8 @@ ## Version -This is version 0.0 of the Webcil format. +This is version 0.0 of the Webcil payload format. +This is version 0 of the WebAssembly module Webcil wrapper. ## Motivation @@ -10,13 +11,74 @@ When deploying the .NET runtime to the browser using WebAssembly, we have receiv customers that certain users are unable to use their apps because firewalls and anti-virus software may prevent browsers from downloading or caching assemblies with a .DLL extension and PE contents. -This document defines a new container format for ECMA-335 assemblies -that uses the `.webcil` extension and uses a new WebCIL container -format. +This document defines a new container format for ECMA-335 assemblies that uses the `.wasm` extension +and uses a new WebCIL metadata payload format wrapped in a WebAssembly module. ## Specification +### Webcil WebAssembly module + +Webcil consists of a standard [binary WebAssembly version 0 module](https://webassembly.github.io/spec/core/binary/index.html) containing the following WAT module: + +``` wat +(module + (data "\0f\00\00\00") ;; data segment 0: payload size as a 4 byte LE uint32 + (data "webcil Payload\cc") ;; data segment 1: webcil payload + (memory (import "webcil" "memory") 1) + (global (export "webcilVersion") i32 (i32.const 0)) + (func (export "getWebcilSize") (param $destPtr i32) (result) + local.get $destPtr + i32.const 0 + i32.const 4 + memory.init 0) + (func (export "getWebcilPayload") (param $d i32) (param $n i32) (result) + local.get $d + i32.const 0 + local.get $n + memory.init 1)) +``` + +That is, the module imports linear memory 0 and exports: +* a global `i32` `webcilVersion` encoding the version of the WebAssembly wrapper (currently 0), +* a function `getWebcilSize : i32 -> ()` that writes the size of the Webcil payload to the specified + address in linear memory as a `u32` (that is: 4 LE bytes). +* a function `getWebcilPayload : i32 i32 -> ()` that writes `$n` bytes of the content of the Webcil + payload at the spcified address `$d` in linear memory. + +The Webcil payload size and payload content are stored in the data section of the WebAssembly module +as passive data segments 0 and 1, respectively. The module must not contain additional data +segments. The module must store the payload size in data segment 0, and the payload content in data +segment 1. + +The payload content in data segment 1 must be aligned on a 4-byte boundary within the web assembly +module. Additional trailing padding may be added to the data segment 0 content to correctly align +data segment 1's content. + +(**Rationale**: With this wrapper it is possible to split the WebAssembly module into a *prefix* +consisting of everything before the data section, the data section, and a *suffix* that consists of +everything after the data section. The prefix and suffix do not depend on the contents of the +Webcil payload and a tool that generates Webcil files could simply emit the prefix and suffix from +constant data. The data section is the only variable content between different Webcil-encoded .NET +assemblies) + +(**Rationale**: Encoding the payload in the data section in passive data segments with known indices +allows a runtime that does not include a WebAssembly host or a runtime that does not wish to +instantiate the WebAssembly module to extract the payload by traversing the WebAssembly module and +locating the Webcil payload in the data section at segment 1.) + +(**Rationale**: The alignment requirement is due to ECMA-335 metadata requiring certain portions of +the physical layout to be 4-byte aligned, for example ECMA-335 Section II.25.4 and II.25.4.5. +Aligning the Webcil content within the wasm module allows tools that directly examine the wasm +module without instantiating it to properly parse the ECMA-335 metadata in the Webcil payload.) + +(**Note**: the wrapper may be versioned independently of the payload.) + + +### Webcil payload + +The webcil payload contains the ECMA-335 metadata, IL and resources comprising a .NET assembly. + As our starting point we take section II.25.1 "Structure of the runtime file format" from ECMA-335 6th Edition. @@ -40,12 +102,12 @@ A Webcil file follows a similar structure | CLI Data | | | -## Webcil Headers +### Webcil Headers The Webcil headers consist of a Webcil header followed by a sequence of section headers. (All multi-byte integers are in little endian format). -### Webcil Header +#### Webcil Header ``` c struct WebcilHeader { @@ -75,11 +137,11 @@ The next pairs of integers are a subset of the PE Header data directory specifyi of the CLI header, as well as the directory entry for the PE debug directory. -### Section header table +#### Section header table Immediately following the Webcil header is a sequence (whose length is given by `coff_sections` above) of section headers giving their virtual address and virtual size, as well as the offset in -the Webcil file and the size in the file. This is a subset of the PE section header that includes +the Webcil payload and the size in the file. This is a subset of the PE section header that includes enough information to correctly interpret the RVAs from the webcil header and from the .NET metadata. Other information (such as the section names) are not included. @@ -92,11 +154,13 @@ struct SectionHeader { }; ``` -### Sections +(**Note**: the `st_raw_data_ptr` member is an offset from the beginning of the Webcil payload, not from the beginning of the WebAssembly wrapper module.) + +#### Sections Immediately following the section table are the sections. These are copied verbatim from the PE file. -## Rationale +### Rationale The intention is to include only the information necessary for the runtime to locate the metadata root, and to resolve the RVA references in the metadata (for locating data declarations and method IL). diff --git a/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WasmModuleReader.cs b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WasmModuleReader.cs new file mode 100644 index 0000000..a0744c3 --- /dev/null +++ b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WasmModuleReader.cs @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Immutable; +using System.IO; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Text; + +namespace Microsoft.NET.WebAssembly.Webcil; + +internal class WasmModuleReader : IDisposable +{ + public enum Section : byte + { + // order matters: enum values must match the WebAssembly spec + Custom, + Type, + Import, + Function, + Table, + Memory, + Global, + Export, + Start, + Element, + Code, + Data, + DataCount, + } + + private readonly BinaryReader _reader; + + private readonly Lazy _isWasmModule; + + public bool IsWasmModule => _isWasmModule.Value; + + public WasmModuleReader(Stream stream) + { + _reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true); + _isWasmModule = new Lazy(this.GetIsWasmModule); + } + + + public void Dispose() + { + Dispose(true); + } + + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _reader.Dispose(); + } + } + + protected virtual bool VisitSection (Section sec, out bool shouldStop) + { + shouldStop = false; + return true; + } + + private const uint WASM_MAGIC = 0x6d736100u; // "\0asm" + + private bool GetIsWasmModule() + { + _reader.BaseStream.Seek(0, SeekOrigin.Begin); + try + { + uint magic = _reader.ReadUInt32(); + if (magic == WASM_MAGIC) + return true; + } catch (EndOfStreamException) {} + return false; + } + + public bool Visit() + { + if (!IsWasmModule) + return false; + _reader.BaseStream.Seek(4L, SeekOrigin.Begin); // skip magic + + uint version = _reader.ReadUInt32(); + if (version != 1) + return false; + + bool success = true; + while (success) { + success = DoVisitSection (out bool shouldStop); + if (shouldStop) + break; + } + return success; + } + + private bool DoVisitSection(out bool shouldStop) + { + shouldStop = false; + byte code = _reader.ReadByte(); + Section section = (Section)code; + if (!Enum.IsDefined(typeof(Section), section)) + return false; + uint sectionSize = ReadULEB128(); + + long savedPos = _reader.BaseStream.Position; + try + { + return VisitSection(section, out shouldStop); + } + finally + { + _reader.BaseStream.Seek(savedPos + (long)sectionSize, SeekOrigin.Begin); + } + } + + protected uint ReadULEB128() + { + uint val = 0; + int shift = 0; + while (true) + { + byte b = _reader.ReadByte(); + val |= (b & 0x7fu) << shift; + if ((b & 0x80u) == 0) break; + shift += 7; + if (shift >= 35) + throw new OverflowException(); + } + return val; + } + + protected bool TryReadPassiveDataSegment (out long segmentLength, out long segmentStart) + { + segmentLength = 0; + segmentStart = 0; + byte code = _reader.ReadByte(); + if (code != 1) + return false; // not passive + segmentLength = ReadULEB128(); + segmentStart = _reader.BaseStream.Position; + // skip over the data + _reader.BaseStream.Seek (segmentLength, SeekOrigin.Current); + return true; + } +} diff --git a/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilConverter.cs b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilConverter.cs index 28a8d0e..a38af72 100644 --- a/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilConverter.cs +++ b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilConverter.cs @@ -42,6 +42,8 @@ public class WebcilConverter private string InputPath => _inputPath; + public bool WrapInWebAssembly { get; set; } = true; + private WebcilConverter(string inputPath, string outputPath) { _inputPath = inputPath; @@ -62,6 +64,26 @@ public class WebcilConverter } using var outputStream = File.Open(_outputPath, FileMode.Create, FileAccess.Write); + if (!WrapInWebAssembly) + { + WriteConversionTo(outputStream, inputStream, peInfo, wcInfo); + } + else + { + // if wrapping in WASM, write the webcil payload to memory because we need to discover the length + + // webcil is about the same size as the PE file + using var memoryStream = new MemoryStream(checked((int)inputStream.Length)); + WriteConversionTo(memoryStream, inputStream, peInfo, wcInfo); + memoryStream.Flush(); + var wrapper = new WebcilWasmWrapper(memoryStream); + memoryStream.Seek(0, SeekOrigin.Begin); + wrapper.WriteWasmWrappedWebcil(outputStream); + } + } + + public void WriteConversionTo(Stream outputStream, FileStream inputStream, PEFileInfo peInfo, WCFileInfo wcInfo) + { WriteHeader(outputStream, wcInfo.Header); WriteSectionHeaders(outputStream, wcInfo.SectionHeaders); CopySections(outputStream, inputStream, peInfo.SectionHeaders); @@ -210,7 +232,7 @@ public class WebcilConverter } #endif - private static void CopySections(FileStream outStream, FileStream inputStream, ImmutableArray peSections) + private static void CopySections(Stream outStream, FileStream inputStream, ImmutableArray peSections) { // endianness: ok, we're just copying from one stream to another foreach (var peHeader in peSections) diff --git a/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilReader.cs b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilReader.cs index 39d5813..c6e2493 100644 --- a/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilReader.cs +++ b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilReader.cs @@ -27,6 +27,8 @@ public sealed partial class WebcilReader : IDisposable private string? InputPath { get; } + private readonly long _webcilInWasmOffset; + public WebcilReader(Stream stream) { this._stream = stream; @@ -34,6 +36,10 @@ public sealed partial class WebcilReader : IDisposable { throw new ArgumentException("Stream must be readable and seekable", nameof(stream)); } + if (TryReadWasmWrapper(out var webcilInWasmOffset)) { + _webcilInWasmOffset = webcilInWasmOffset; + _stream.Seek(_webcilInWasmOffset, SeekOrigin.Begin); + } if (!ReadHeader()) { throw new BadImageFormatException("Stream does not contain a valid Webcil file", nameof(stream)); @@ -181,7 +187,7 @@ public sealed partial class WebcilReader : IDisposable public CodeViewDebugDirectoryData ReadCodeViewDebugDirectoryData(DebugDirectoryEntry entry) { - var pos = entry.DataPointer; + var pos = entry.DataPointer + _webcilInWasmOffset; var buffer = new byte[entry.DataSize]; if (_stream.Seek(pos, SeekOrigin.Begin) != pos) { @@ -227,7 +233,7 @@ public sealed partial class WebcilReader : IDisposable public MetadataReaderProvider ReadEmbeddedPortablePdbDebugDirectoryData(DebugDirectoryEntry entry) { - var pos = entry.DataPointer; + var pos = entry.DataPointer + _webcilInWasmOffset; var buffer = new byte[entry.DataSize]; if (_stream.Seek(pos, SeekOrigin.Begin) != pos) { @@ -289,7 +295,7 @@ public sealed partial class WebcilReader : IDisposable throw new ArgumentException($"expected debug directory entry type {nameof(DebugDirectoryEntryType.PdbChecksum)}", nameof(entry)); } - var pos = entry.DataPointer; + var pos = entry.DataPointer + _webcilInWasmOffset; var buffer = new byte[entry.DataSize]; if (_stream.Seek(pos, SeekOrigin.Begin) != pos) { @@ -330,7 +336,7 @@ public sealed partial class WebcilReader : IDisposable { if (rva >= section.VirtualAddress && rva < section.VirtualAddress + section.VirtualSize) { - return section.PointerToRawData + (rva - section.VirtualAddress); + return section.PointerToRawData + (rva - section.VirtualAddress) + _webcilInWasmOffset; } } throw new BadImageFormatException("RVA not found in any section", nameof(_stream)); @@ -342,7 +348,7 @@ public sealed partial class WebcilReader : IDisposable { var sections = ImmutableArray.CreateBuilder(_header.coff_sections); var buffer = new byte[Marshal.SizeOf()]; - _stream.Seek(SectionDirectoryOffset, SeekOrigin.Begin); + _stream.Seek(SectionDirectoryOffset + _webcilInWasmOffset, SeekOrigin.Begin); for (int i = 0; i < _header.coff_sections; i++) { if (_stream.Read(buffer, 0, buffer.Length) != buffer.Length) @@ -362,4 +368,50 @@ public sealed partial class WebcilReader : IDisposable { _stream.Dispose(); } + + private bool TryReadWasmWrapper(out long webcilInWasmOffset) + { + webcilInWasmOffset = 0; + using var reader = new WasmWrapperModuleReader(_stream); + if (!reader.IsWasmModule) + return false; + if (!reader.Visit()) + return false; + if (!reader.HasWebcil) + return false; + webcilInWasmOffset = reader.WebcilPayloadOffset; + return true; + } + + private sealed class WasmWrapperModuleReader : WasmModuleReader + { + internal bool HasWebcil {get; private set;} + internal long WebcilPayloadOffset {get; private set; } + public WasmWrapperModuleReader(Stream stream) : base (stream) + { + } + + protected override bool VisitSection (WasmModuleReader.Section sec, out bool shouldStop) + { + shouldStop = false; + if (sec != WasmModuleReader.Section.Data) + return true; + shouldStop = true; + + uint numSegments = ReadULEB128(); + if (numSegments != 2) + return false; + + // skip the first segment + if (!TryReadPassiveDataSegment (out long _, out long _)) + return false; + + if (!TryReadPassiveDataSegment (out long _, out long segmentStart)) + return false; + + HasWebcil = true; + WebcilPayloadOffset = segmentStart; + return true; + } + } } diff --git a/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilWasmWrapper.cs b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilWasmWrapper.cs new file mode 100644 index 0000000..3f85604 --- /dev/null +++ b/src/libraries/Microsoft.NET.WebAssembly.Webcil/src/Webcil/WebcilWasmWrapper.cs @@ -0,0 +1,226 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using System.Collections.Immutable; +using System.Reflection.PortableExecutable; +using System.Runtime.InteropServices; + +namespace Microsoft.NET.WebAssembly.Webcil; + +// +// Emits a simple WebAssembly wrapper module around a given webcil payload. +// +// The entire wasm module is going to be unchanging, except for the data section which has 2 passive +// segments. segment 0 is 4 bytes and contains the length of the webcil payload. segment 1 is of a +// variable size and contains the webcil payload. +// +// The unchanging parts are stored as a "prefix" and "suffix" which contain the bytes for the following +// WAT module, split into the parts that come before the data section, and the bytes that come after: +// +// (module +// (data "\0f\00\00\00") ;; data segment 0: payload size as a 4 byte LE uint32 +// (data "webcil Payload\cc") ;; data segment 1: webcil payload +// (memory (import "webcil" "memory") 1) +// (global (export "webcilVersion") i32 (i32.const 0)) +// (func (export "getWebcilSize") (param $destPtr i32) (result) +// local.get $destPtr +// i32.const 0 +// i32.const 4 +// memory.init 0) +// (func (export "getWebcilPayload") (param $d i32) (param $n i32) (result) +// local.get $d +// i32.const 0 +// local.get $n +// memory.init 1)) +public class WebcilWasmWrapper +{ + private readonly Stream _webcilPayloadStream; + private readonly uint _webcilPayloadSize; + + public WebcilWasmWrapper(Stream webcilPayloadStream) + { + _webcilPayloadStream = webcilPayloadStream; + long len = webcilPayloadStream.Length; + if (len > (long)uint.MaxValue) + throw new InvalidOperationException("webcil payload too large"); + _webcilPayloadSize = (uint)len; + } + + public void WriteWasmWrappedWebcil(Stream outputStream) + { + WriteWasmHeader(outputStream); + using (var writer = new BinaryWriter(outputStream, System.Text.Encoding.UTF8, leaveOpen: true)) + { + WriteDataSection(writer); + } + WriteWasmSuffix(outputStream); + } + + // + // Everything from the above wat module before the data section + // + // extracted by wasm-reader -s wrapper.wasm + private static +#if NET7_0_OR_GREATER + ReadOnlyMemory +#else + byte[] +#endif + s_wasmWrapperPrefix = new byte[] { + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x0a, 0x02, 0x60, 0x01, 0x7f, 0x00, 0x60, 0x02, 0x7f, 0x7f, 0x00, 0x02, 0x12, 0x01, 0x06, 0x77, 0x65, 0x62, 0x63, 0x69, 0x6c, 0x06, 0x6d, + 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x02, 0x00, 0x01, 0x03, 0x03, 0x02, 0x00, 0x01, 0x06, 0x0b, 0x02, 0x7f, 0x00, 0x41, 0x00, 0x0b, 0x7f, 0x00, 0x41, 0x00, 0x0b, 0x07, 0x41, 0x04, 0x0d, 0x77, 0x65, + 0x62, 0x63, 0x69, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x03, 0x00, 0x0a, 0x77, 0x65, 0x62, 0x63, 0x69, 0x6c, 0x53, 0x69, 0x7a, 0x65, 0x03, 0x01, 0x0d, 0x67, 0x65, 0x74, 0x57, 0x65, + 0x62, 0x63, 0x69, 0x6c, 0x53, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x10, 0x67, 0x65, 0x74, 0x57, 0x65, 0x62, 0x63, 0x69, 0x6c, 0x50, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x00, 0x01, 0x0c, 0x01, 0x02, + 0x0a, 0x1b, 0x02, 0x0c, 0x00, 0x20, 0x00, 0x41, 0x00, 0x41, 0x04, 0xfc, 0x08, 0x00, 0x00, 0x0b, 0x0c, 0x00, 0x20, 0x00, 0x41, 0x00, 0x20, 0x01, 0xfc, 0x08, 0x01, 0x00, 0x0b, + }; + // + // Everything from the above wat module after the data section + // + // extracted by wasm-reader -s wrapper.wasm + private static +#if NET7_0_OR_GREATER + ReadOnlyMemory +#else + byte[] +#endif + s_wasmWrapperSuffix = new byte[] { + 0x00, 0x1b, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x02, 0x14, 0x02, 0x00, 0x01, 0x00, 0x07, 0x64, 0x65, 0x73, 0x74, 0x50, 0x74, 0x72, 0x01, 0x02, 0x00, 0x01, 0x64, 0x01, 0x01, 0x6e, + }; + + private static void WriteWasmHeader(Stream outputStream) + { +#if NET7_0_OR_GREATER + outputStream.Write(s_wasmWrapperPrefix.Span); +#else + outputStream.Write(s_wasmWrapperPrefix, 0, s_wasmWrapperPrefix.Length); +#endif + } + + private static void WriteWasmSuffix(Stream outputStream) + { +#if NET7_0_OR_GREATER + outputStream.Write(s_wasmWrapperSuffix.Span); +#else + outputStream.Write(s_wasmWrapperSuffix, 0, s_wasmWrapperSuffix.Length); +#endif + } + + // 1 byte to encode "passive" data segment + private const uint SegmentCodeSize = 1; + + // Align the payload start to a 4-byte boundary within the wrapper. If the runtime reads the + // payload directly, instead of by instantiatng the wasm module, we don't want the WebAssembly + // prefix to push some of the values inside the image to odd byte offsets as the runtime assumes + // the image will be aligned. + // + // There are requirements in ECMA-335 (Section II.25.4) that fat method headers and method data + // sections be 4-byte aligned. + private const uint WebcilPayloadInternalAlignment = 4; + + private void WriteDataSection(BinaryWriter writer) + { + + uint dataSectionSize = 0; + // uleb128 encoding of number of segments + dataSectionSize += 1; // there's always 2 segments which encodes to 1 byte + // compute the segment 0 size: + // segment 0 has 1 byte segment code, 1 byte of size and at least 4 bytes of payload + uint segment0MinimumSize = SegmentCodeSize + 1 + 4; + dataSectionSize += segment0MinimumSize; + + // encode webcil size as a uleb128 + byte[] ulebWebcilPayloadSize = ULEB128Encode(_webcilPayloadSize); + + // compute the segment 1 size: + // segment 1 has 1 byte segment code, a uleb128 encoding of the webcilPayloadSize, and the payload + // don't count the size of the payload yet + checked + { + dataSectionSize += SegmentCodeSize + (uint)ulebWebcilPayloadSize.Length; + } + + // at this point the data section size includes everything except the data section code, the data section size and the webcil payload itself + // and any extra padding that we may want to add to segment 0. + // So we can compute the offset of the payload within the wasm module. + byte[] putativeULEBDataSectionSize = ULEB128Encode(dataSectionSize + _webcilPayloadSize); + uint payloadOffset = (uint)s_wasmWrapperPrefix.Length + 1 + (uint)putativeULEBDataSectionSize.Length + dataSectionSize ; + + uint paddingSize = PadTo(payloadOffset, WebcilPayloadInternalAlignment); + + if (paddingSize > 0) + { + checked + { + dataSectionSize += paddingSize; + } + } + + checked + { + dataSectionSize += _webcilPayloadSize; + } + + byte[] ulebSectionSize = ULEB128Encode(dataSectionSize); + + if (putativeULEBDataSectionSize.Length != ulebSectionSize.Length) + throw new InvalidOperationException ("adding padding would cause data section's encoded length to chane"); // TODO: fixme: there's upto one extra byte to encode the section length - take away a padding byte. + writer.Write((byte)11); // section Data + writer.Write(ulebSectionSize, 0, ulebSectionSize.Length); + + writer.Write((byte)2); // number of segments + + // write segment 0 + writer.Write((byte)1); // passive segment + if (paddingSize + 4 > 127) { + throw new InvalidOperationException ("padding would cause segment 0 to need a multi-byte ULEB128 size encoding"); + } + writer.Write((byte)(4 + paddingSize)); // segment size: 4 plus any padding + writer.Write((uint)_webcilPayloadSize); // payload is an unsigned 32 bit number + for (int i = 0; i < paddingSize; i++) + writer.Write((byte)0); + + // write segment 1 + writer.Write((byte)1); // passive segment + writer.Write(ulebWebcilPayloadSize, 0, ulebWebcilPayloadSize.Length); // segment size: _webcilPayloadSize + if (writer.BaseStream.Position % WebcilPayloadInternalAlignment != 0) { + throw new Exception ($"predited offset {payloadOffset}, actual position {writer.BaseStream.Position}"); + } + _webcilPayloadStream.CopyTo(writer.BaseStream); // payload is the entire webcil content + } + + private static byte[] ULEB128Encode(uint value) + { + uint n = value; + int len = 0; + do + { + n >>= 7; + len++; + } while (n != 0); + byte[] arr = new byte[len]; + int i = 0; + n = value; + do + { + byte b = (byte)(n & 0x7f); + n >>= 7; + if (n != 0) + b |= 0x80; + arr[i++] = b; + } while (n != 0); + return arr; + } + + private static uint PadTo (uint value, uint align) + { + uint newValue = AlignTo(value, align); + return newValue - value; + } + + private static uint AlignTo (uint value, uint align) + { + return (value + (align - 1)) & ~(align - 1); + } +} diff --git a/src/mono/mono/metadata/assembly.c b/src/mono/mono/metadata/assembly.c index c2caede..6758877 100644 --- a/src/mono/mono/metadata/assembly.c +++ b/src/mono/mono/metadata/assembly.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifndef HOST_WIN32 #include @@ -1465,6 +1466,13 @@ bundled_assembly_match (const char *bundled_name, const char *name) if (bprefix == nprefix && strncmp (bundled_name, name, bprefix) == 0) return TRUE; } + /* if they want a .dll and we have the matching .wasm webcil-in-wasm, return it */ + if (g_str_has_suffix (bundled_name, MONO_WEBCIL_IN_WASM_EXTENSION) && g_str_has_suffix (name, ".dll")) { + size_t bprefix = strlen (bundled_name) - strlen (MONO_WEBCIL_IN_WASM_EXTENSION); + size_t nprefix = strlen (name) - strlen (".dll"); + if (bprefix == nprefix && strncmp (bundled_name, name, bprefix) == 0) + return TRUE; + } return FALSE; #endif } @@ -2737,6 +2745,12 @@ mono_assembly_load_corlib (void) corlib = mono_assembly_request_open (corlib_name, &req, &status); g_free (corlib_name); } + if (!corlib) { + /* Maybe its in a bundle */ + char *corlib_name = g_strdup_printf ("%s%s", MONO_ASSEMBLY_CORLIB_NAME, MONO_WEBCIL_IN_WASM_EXTENSION); + corlib = mono_assembly_request_open (corlib_name, &req, &status); + g_free (corlib_name); + } #endif g_assert (corlib); diff --git a/src/mono/mono/metadata/image.c b/src/mono/mono/metadata/image.c index 2eef023..6e07a5d 100644 --- a/src/mono/mono/metadata/image.c +++ b/src/mono/mono/metadata/image.c @@ -958,8 +958,9 @@ mono_has_pdb_checksum (char *raw_data, uint32_t raw_data_len) int32_t ret = try_load_pe_cli_header (raw_data, raw_data_len, &cli_header); #ifdef ENABLE_WEBCIL + int32_t webcil_section_adjustment = 0; if (ret == -1) { - ret = mono_webcil_load_cli_header (raw_data, raw_data_len, 0, &cli_header); + ret = mono_webcil_load_cli_header (raw_data, raw_data_len, 0, &cli_header, &webcil_section_adjustment); is_pe = FALSE; } #endif @@ -992,7 +993,7 @@ mono_has_pdb_checksum (char *raw_data, uint32_t raw_data_len) } #ifdef ENABLE_WEBCIL else { - ret = mono_webcil_load_section_table (raw_data, raw_data_len, ret, &t); + ret = mono_webcil_load_section_table (raw_data, raw_data_len, ret, webcil_section_adjustment, &t); if (ret == -1) return FALSE; } @@ -1348,7 +1349,7 @@ mono_image_storage_dtor (gpointer self) } } if (storage->raw_data_allocated) { - g_free (storage->raw_data); + g_free (storage->raw_data_handle); } g_free (storage->key); @@ -1429,6 +1430,7 @@ mono_image_storage_new_raw_data (char *datac, guint32 data_len, gboolean raw_dat storage->raw_data = datac; storage->raw_data_len = data_len; storage->raw_data_allocated = !!raw_data_allocated; + storage->raw_data_handle = datac; storage->key = key; MonoImageStorage *other_storage = NULL; diff --git a/src/mono/mono/metadata/metadata-internals.h b/src/mono/mono/metadata/metadata-internals.h index 009861f..e073ab2 100644 --- a/src/mono/mono/metadata/metadata-internals.h +++ b/src/mono/mono/metadata/metadata-internals.h @@ -282,6 +282,12 @@ typedef struct { /* Module entry point is _CorDllMain. */ guint8 has_entry_point : 1; #endif +#ifdef ENABLE_WEBCIL + /* set to a non-zero value when we load a webcil-in-wasm image. + * Note that in that case MonoImage:raw_data is not equal to MonoImageStorage:raw_data + */ + int32_t webcil_section_adjustment; +#endif } MonoImageStorage; struct _MonoImage { @@ -297,7 +303,7 @@ struct _MonoImage { MonoImageStorage *storage; - /* Aliases storage->raw_data when storage is non-NULL. Otherwise NULL. */ + /* Points into storage->raw_data when storage is non-NULL. Otherwise NULL. */ char *raw_data; guint32 raw_data_len; diff --git a/src/mono/mono/metadata/mono-debug.c b/src/mono/mono/metadata/mono-debug.c index 993a6fd..c415e3b 100644 --- a/src/mono/mono/metadata/mono-debug.c +++ b/src/mono/mono/metadata/mono-debug.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #if NO_UNALIGNED_ACCESS @@ -1111,6 +1112,13 @@ bsymfile_match (BundledSymfile *bsymfile, const char *assembly_name) && !strcmp (bsymfile->aname + n, ".dll")) return TRUE; } + p = strstr (assembly_name, MONO_WEBCIL_IN_WASM_EXTENSION); + if (p && *(p + strlen(MONO_WEBCIL_IN_WASM_EXTENSION)) == 0) { + size_t n = p - assembly_name; + if (!strncmp (bsymfile->aname, assembly_name, n) + && !strcmp (bsymfile->aname + n, ".dll")) + return TRUE; + } #endif return FALSE; } diff --git a/src/mono/mono/metadata/webcil-loader.c b/src/mono/mono/metadata/webcil-loader.c index 1323c0f..c96523a 100644 --- a/src/mono/mono/metadata/webcil-loader.c +++ b/src/mono/mono/metadata/webcil-loader.c @@ -8,6 +8,8 @@ #include "mono/metadata/metadata-internals.h" #include "mono/metadata/webcil-loader.h" +#include "mono/utils/mono-logger-internals.h" +#include "mono/utils/wasm-module-reader.h" /* keep in sync with webcil-writer */ enum { @@ -35,12 +37,23 @@ typedef struct MonoWebCilHeader { } MonoWebCilHeader; static gboolean +find_webcil_in_wasm (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **webcil_payload_start); + +static gboolean webcil_image_match (MonoImage *image) { + gboolean success = FALSE; if (image->raw_data_len >= sizeof (MonoWebCilHeader)) { - return image->raw_data[0] == 'W' && image->raw_data[1] == 'b' && image->raw_data[2] == 'I' && image->raw_data[3] == 'L'; + success = image->raw_data[0] == 'W' && image->raw_data[1] == 'b' && image->raw_data[2] == 'I' && image->raw_data[3] == 'L'; + + if (!success && mono_wasm_module_is_wasm ((const uint8_t*)image->raw_data, (const uint8_t*)image->raw_data + image->raw_data_len)) { + /* if it's a WebAssembly module, assume it's webcil-in-wasm and + * optimistically return TRUE + */ + success = TRUE; + } } - return FALSE; + return success; } /* @@ -49,9 +62,23 @@ webcil_image_match (MonoImage *image) * most of MonoDotNetHeader is unused and left uninitialized (assumed zero); */ static int32_t -do_load_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoDotNetHeader *header) +do_load_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoDotNetHeader *header, int32_t *raw_data_rva_map_wasm_bump) { MonoWebCilHeader wcheader; + const uint8_t *raw_data_bound = (const uint8_t*)raw_data + raw_data_len; + *raw_data_rva_map_wasm_bump = 0; + if (mono_wasm_module_is_wasm ((const uint8_t*)raw_data, raw_data_bound)) { + /* assume it's webcil wrapped in wasm */ + const uint8_t *webcil_segment_start = NULL; + if (!find_webcil_in_wasm ((const uint8_t*)raw_data, raw_data_bound, &webcil_segment_start)) + return -1; + // HACK: adjust all the rva physical offsets by this amount + int32_t offset_adjustment = (int32_t)(webcil_segment_start - (const uint8_t*)raw_data); + *raw_data_rva_map_wasm_bump = offset_adjustment; + // skip to the beginning of the webcil payload + offset += offset_adjustment; + } + if (offset + sizeof (MonoWebCilHeader) > raw_data_len) return -1; memcpy (&wcheader, raw_data + offset, sizeof (wcheader)); @@ -72,7 +99,7 @@ do_load_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, Mon } int32_t -mono_webcil_load_section_table (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoSectionTable *t) +mono_webcil_load_section_table (const char *raw_data, uint32_t raw_data_len, int32_t offset, int32_t webcil_section_adjustment, MonoSectionTable *t) { /* WebCIL section table entries are a subset of a PE section * header. Initialize just the parts we have. @@ -87,7 +114,7 @@ mono_webcil_load_section_table (const char *raw_data, uint32_t raw_data_len, int t->st_virtual_size = GUINT32_FROM_LE (st [0]); t->st_virtual_address = GUINT32_FROM_LE (st [1]); t->st_raw_data_size = GUINT32_FROM_LE (st [2]); - t->st_raw_data_ptr = GUINT32_FROM_LE (st [3]); + t->st_raw_data_ptr = GUINT32_FROM_LE (st [3]) + (uint32_t)webcil_section_adjustment; offset += sizeof(st); return offset; } @@ -99,14 +126,30 @@ webcil_image_load_pe_data (MonoImage *image) MonoCLIImageInfo *iinfo; MonoDotNetHeader *header; int32_t offset = 0; + int32_t webcil_section_adjustment = 0; int top; iinfo = image->image_info; header = &iinfo->cli_header; - offset = do_load_header (image->raw_data, image->raw_data_len, offset, header); + offset = do_load_header (image->raw_data, image->raw_data_len, offset, header, &webcil_section_adjustment); if (offset == -1) goto invalid_image; + /* HACK! RVAs and debug table entry pointers are from the beginning of the webcil payload. adjust MonoImage:raw_data to point to it */ + g_assert (image->ref_count == 1); + // NOTE: image->storage->raw_data could be shared if we loaded this image multiple times (for different ALCs, for example) + // Do not adjust image->storage->raw_data. +#ifdef ENABLE_WEBCIL + int32_t old_adjustment; + old_adjustment = mono_atomic_cas_i32 ((volatile gint32*)&image->storage->webcil_section_adjustment, webcil_section_adjustment, 0); + g_assert (old_adjustment == 0 || old_adjustment == webcil_section_adjustment); +#endif + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_ASSEMBLY, "Adjusting offset image %s [%p].", image->name, image); + image->raw_data += webcil_section_adjustment; + image->raw_data_len -= webcil_section_adjustment; + offset -= webcil_section_adjustment; + // parts of ecma-335 loading depend on 4-byte alignment of the image + g_assertf (((intptr_t)image->raw_data) % 4 == 0, "webcil image %s [%p] raw data %p not 4 byte aligned\n", image->name, image, image->raw_data); top = iinfo->cli_header.coff.coff_sections; @@ -116,7 +159,7 @@ webcil_image_load_pe_data (MonoImage *image) for (int i = 0; i < top; i++) { MonoSectionTable *t = &iinfo->cli_section_tables [i]; - offset = mono_webcil_load_section_table (image->raw_data, image->raw_data_len, offset, t); + offset = mono_webcil_load_section_table (image->raw_data, image->raw_data_len, offset, /*webcil_section_adjustment*/ 0, t); if (offset == -1) goto invalid_image; } @@ -164,7 +207,55 @@ mono_webcil_loader_install (void) } int32_t -mono_webcil_load_cli_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoDotNetHeader *header) +mono_webcil_load_cli_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoDotNetHeader *header, int32_t *webcil_section_adjustment) +{ + return do_load_header (raw_data, raw_data_len, offset, header, webcil_section_adjustment); +} + +struct webcil_in_wasm_ud { - return do_load_header (raw_data, raw_data_len, offset, header); + const uint8_t *data_segment_1_start; +}; + +static gboolean +webcil_in_wasm_section_visitor (uint8_t sec_code, const uint8_t *sec_content, uint32_t sec_length, gpointer user_data, gboolean *should_stop) +{ + *should_stop = FALSE; + if (sec_code != MONO_WASM_MODULE_DATA_SECTION) + return TRUE; + struct webcil_in_wasm_ud *data = (struct webcil_in_wasm_ud *)user_data; + + *should_stop = TRUE; // we don't care about the sections after the data section + const uint8_t *ptr = sec_content; + const uint8_t *boundp = sec_content + sec_length; + + uint32_t num_segments = 0; + if (!mono_wasm_module_decode_uleb128 (ptr, boundp, &ptr, &num_segments)) + return FALSE; + + if (num_segments != 2) + return FALSE; + + // skip over data segment 0, it's the webcil payload length as a u32 plus padding - we don't care about it + uint32_t passive_segment_len = 0; + const uint8_t *passive_segment_start = NULL; + if (!mono_wasm_module_decode_passive_data_segment (ptr, boundp, &ptr, &passive_segment_len, &passive_segment_start)) + return FALSE; + // data segment 1 is the actual webcil payload. + if (!mono_wasm_module_decode_passive_data_segment (ptr, boundp, &ptr, &passive_segment_len, &passive_segment_start)) + return FALSE; + data->data_segment_1_start = passive_segment_start; + return TRUE; +} + +static gboolean +find_webcil_in_wasm (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **webcil_payload_start) +{ + struct webcil_in_wasm_ud user_data = {0,}; + MonoWasmModuleVisitor visitor = {0,}; + visitor.section_visitor = &webcil_in_wasm_section_visitor; + if (!mono_wasm_module_visit(ptr, boundp, &visitor, &user_data)) + return FALSE; + *webcil_payload_start = user_data.data_segment_1_start; + return TRUE; } diff --git a/src/mono/mono/metadata/webcil-loader.h b/src/mono/mono/metadata/webcil-loader.h index c95c2c5..daf6217 100644 --- a/src/mono/mono/metadata/webcil-loader.h +++ b/src/mono/mono/metadata/webcil-loader.h @@ -5,13 +5,15 @@ #ifndef _MONO_METADATA_WEBCIL_LOADER_H #define _MONO_METADATA_WEBCIL_LOADER_H +#define MONO_WEBCIL_IN_WASM_EXTENSION ".wasm" + void mono_webcil_loader_install (void); int32_t -mono_webcil_load_cli_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoDotNetHeader *header); +mono_webcil_load_cli_header (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoDotNetHeader *header, int32_t *webcil_section_adjustment); int32_t -mono_webcil_load_section_table (const char *raw_data, uint32_t raw_data_len, int32_t offset, MonoSectionTable *t); +mono_webcil_load_section_table (const char *raw_data, uint32_t raw_data_len, int32_t offset, int32_t webcil_section_adjustment, MonoSectionTable *t); #endif /*_MONO_METADATA_WEBCIL_LOADER_H*/ diff --git a/src/mono/mono/mini/monovm.c b/src/mono/mono/mini/monovm.c index 3730d6b..6c55982 100644 --- a/src/mono/mono/mini/monovm.c +++ b/src/mono/mono/mini/monovm.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -140,7 +141,7 @@ mono_core_preload_hook (MonoAssemblyLoadContext *alc, MonoAssemblyName *aname, c n -= strlen(".dll"); char *fullpath2 = g_malloc (n + strlen(".webcil") + 1); g_strlcpy (fullpath2, fullpath, n + 1); - g_strlcpy (fullpath2 + n, ".webcil", 8); + g_strlcpy (fullpath2 + n, ".webcil", strlen(".webcil") + 1); if (g_file_test (fullpath2, G_FILE_TEST_IS_REGULAR)) { MonoImageOpenStatus status; result = mono_assembly_request_open (fullpath2, &req, &status); @@ -148,6 +149,16 @@ mono_core_preload_hook (MonoAssemblyLoadContext *alc, MonoAssemblyName *aname, c g_free (fullpath2); if (result) break; + char *fullpath3 = g_malloc (n + strlen(MONO_WEBCIL_IN_WASM_EXTENSION) + 1); + g_strlcpy (fullpath3, fullpath, n + 1); + g_strlcpy (fullpath3 + n, MONO_WEBCIL_IN_WASM_EXTENSION, strlen(MONO_WEBCIL_IN_WASM_EXTENSION) + 1); + if (g_file_test (fullpath3, G_FILE_TEST_IS_REGULAR)) { + MonoImageOpenStatus status; + result = mono_assembly_request_open (fullpath3, &req, &status); + } + g_free (fullpath3); + if (result) + break; } #endif } diff --git a/src/mono/mono/utils/CMakeLists.txt b/src/mono/mono/utils/CMakeLists.txt index e26ec96..efbfa3c 100644 --- a/src/mono/mono/utils/CMakeLists.txt +++ b/src/mono/mono/utils/CMakeLists.txt @@ -184,7 +184,10 @@ set(utils_common_sources options.h options-def.h options.c - ftnptr.h) + ftnptr.h + wasm-module-reader.h + wasm-module-reader.c + ) if(MONO_CROSS_COMPILE) set(utils_arch_sources mach-support-unknown.c) diff --git a/src/mono/mono/utils/wasm-module-reader.c b/src/mono/mono/utils/wasm-module-reader.c new file mode 100644 index 0000000..0f078f9 --- /dev/null +++ b/src/mono/mono/utils/wasm-module-reader.c @@ -0,0 +1,140 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#include "mono/metadata/mono-endian.h" + +#include "wasm-module-reader.h" + +#define WASM_MODULE_SECTION(ident,str) str, +static const char * +wasm_module_section_names[] = { +#include "wasm-sections.def" +#undef WASM_MODULE_SECTION +}; + +static const char * +mono_wasm_module_section_get_name (int section) +{ + g_assert (section > 0 && section < MONO_WASM_MODULE_NUM_SECTIONS); + return wasm_module_section_names[section]; +} + +static gboolean +bc_read8 (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint8_t *out) +{ + if (ptr < boundp) { + *out = *ptr; + *endp = ptr + 1; + return TRUE; + } + return FALSE; +} + +static gboolean +bc_read32 (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint32_t *out) +{ + if (ptr + 3 < boundp) { + *out = read32 (ptr); + *endp = ptr + 4; + return TRUE; + } + return FALSE; +} + +static gboolean +bc_read_uleb128 (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint32_t *out) +{ + uint32_t val = 0; + unsigned int shift = 0; + while (1) { + uint8_t b; + if (!bc_read8 (ptr, boundp, &ptr, &b)) + return FALSE; + val |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) break; + shift += 7; + g_assertf (shift < 35, "expected uleb128 encoded u32, got extra bytes\n"); + } + *out = val; + *endp = ptr; + + return TRUE; +} + +gboolean +mono_wasm_module_decode_uleb128 (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint32_t *out) +{ + return bc_read_uleb128 (ptr, boundp, endp, out); +} + +static gboolean +visit_section (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, MonoWasmModuleVisitor *visitor, gpointer user_data, gboolean *should_stop) +{ + uint8_t code = 0; + uint32_t sec_size = 0; + if (!bc_read8 (ptr, boundp, &ptr, &code)) + return FALSE; + if (!bc_read_uleb128 (ptr, boundp, &ptr, &sec_size)) + return FALSE; + + *should_stop = FALSE; + gboolean success = visitor->section_visitor (code, ptr, sec_size, user_data, should_stop); + *endp = ptr + sec_size; // advance past the section payload + return success; +} + +/* + * return TRUE if successfully visited, FALSE if there was a problem + */ +gboolean +mono_wasm_module_visit (const uint8_t *ptr, const uint8_t *boundp, MonoWasmModuleVisitor *visitor, gpointer user_data) +{ + if (!mono_wasm_module_is_wasm (ptr, boundp)) + return FALSE; + + ptr += 4; + + uint32_t version = 0; + if (!bc_read32 (ptr, boundp, &ptr, &version)) + return FALSE; + if (version != 1) + return FALSE; + + gboolean success = TRUE; + + gboolean stop = FALSE; + while (success && !stop && ptr < boundp) { + success = visit_section (ptr, boundp, &ptr, visitor, user_data, &stop); + } + + return success; +} + +gboolean +mono_wasm_module_is_wasm (const uint8_t *ptr, const uint8_t *boundp) +{ + const uint32_t wasm_magic = 0x6d736100u; // "\0asm" + uint32_t magic = 0; + if (!bc_read32 (ptr, boundp, &ptr, &magic)) + return FALSE; + return magic == wasm_magic; +} + +gboolean +mono_wasm_module_decode_passive_data_segment (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint32_t *data_len, const uint8_t **data_start) +{ + uint8_t code = 0; + if (!bc_read8 (ptr, boundp, &ptr, &code)) + return FALSE; + if (code != 1) + return FALSE; // not a passive segment + uint32_t len = 0; + if (!bc_read_uleb128 (ptr, boundp, &ptr, &len)) + return FALSE; + *data_start = ptr; + *data_len = len; + *endp = ptr + len; + return TRUE; +} diff --git a/src/mono/mono/utils/wasm-module-reader.h b/src/mono/mono/utils/wasm-module-reader.h new file mode 100644 index 0000000..1bf7fa6 --- /dev/null +++ b/src/mono/mono/utils/wasm-module-reader.h @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __MONO_WASM_MODULE_READER_H__ +#define __MONO_WASM_MODULE_READER_H__ + +#include + +typedef struct MonoWasmModuleVisitor +{ + /* return TRUE for success, set *should_stop to stop visitation */ + gboolean (*section_visitor) (uint8_t sec_code, const uint8_t *sec_content, uint32_t sec_length, gpointer user_data, gboolean *should_stop); +} MonoWasmModuleVisitor; + +#define WASM_MODULE_SECTION(ident,str) MONO_WASM_MODULE_ ## ident ## _SECTION, +enum { +#include "wasm-sections.def" + MONO_WASM_MODULE_NUM_SECTIONS, +}; +#undef WASM_MODULE_SECTION + +gboolean +mono_wasm_module_decode_uleb128 (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint32_t *out); + +gboolean +mono_wasm_module_is_wasm (const uint8_t *ptr, const uint8_t *boundp); + +gboolean +mono_wasm_module_visit (const uint8_t *ptr, const uint8_t *boundp, MonoWasmModuleVisitor *visitor, gpointer user_data); + +/* returns FALSE if the data segment is not passive */ +gboolean +mono_wasm_module_decode_passive_data_segment (const uint8_t *ptr, const uint8_t *boundp, const uint8_t **endp, uint32_t *data_len, const uint8_t **data_start); + +#endif /* __MONO_WASM_MODULE_READER_H__*/ diff --git a/src/mono/mono/utils/wasm-sections.def b/src/mono/mono/utils/wasm-sections.def new file mode 100644 index 0000000..dd55c71 --- /dev/null +++ b/src/mono/mono/utils/wasm-sections.def @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef WASM_MODULE_SECTION +#error "define WASM_MODULE_SECTION(ident,str) before including this header" +#endif + +// order matters and must match the section codes from the WebAssembly spec +WASM_MODULE_SECTION(CUSTOM, "custom") +WASM_MODULE_SECTION(TYPE, "type") +WASM_MODULE_SECTION(IMPORT, "import") +WASM_MODULE_SECTION(FUNCTION, "function") +WASM_MODULE_SECTION(TABLE, "table") +WASM_MODULE_SECTION(MEMORY, "memory") +WASM_MODULE_SECTION(GLOBAL, "global") +WASM_MODULE_SECTION(EXPORT, "export") +WASM_MODULE_SECTION(START, "start") +WASM_MODULE_SECTION(ELEMENT, "element") +WASM_MODULE_SECTION(CODE, "code") +WASM_MODULE_SECTION(DATA, "data") +WASM_MODULE_SECTION(DATACOUNT, "data count") \ No newline at end of file diff --git a/src/mono/sample/wasm/browser-advanced/index.html b/src/mono/sample/wasm/browser-advanced/index.html index 179a035..b0fd27b 100644 --- a/src/mono/sample/wasm/browser-advanced/index.html +++ b/src/mono/sample/wasm/browser-advanced/index.html @@ -16,7 +16,7 @@ - + @@ -24,4 +24,4 @@ Answer to the Ultimate Question of Life, the Universe, and Everything is : - \ No newline at end of file + diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/BuildPublishTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/BuildPublishTests.cs index 939ed44..665735a 100644 --- a/src/mono/wasm/Wasm.Build.Tests/Blazor/BuildPublishTests.cs +++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/BuildPublishTests.cs @@ -197,7 +197,7 @@ public class BuildPublishTests : BuildTestBase .ExecuteWithCapturedOutput("new razorclasslib") .EnsureSuccessful(); - string razorClassLibraryFileName = UseWebcil ? "RazorClassLibrary.webcil" : "RazorClassLibrary.dll"; + string razorClassLibraryFileName = UseWebcil ? $"RazorClassLibrary{WebcilInWasmExtension}" : "RazorClassLibrary.dll"; AddItemsPropertiesToProject(wasmProjectFile, extraItems: @$" diff --git a/src/mono/wasm/Wasm.Build.Tests/BuildTestBase.cs b/src/mono/wasm/Wasm.Build.Tests/BuildTestBase.cs index 8a0a8d3..e7c948e 100644 --- a/src/mono/wasm/Wasm.Build.Tests/BuildTestBase.cs +++ b/src/mono/wasm/Wasm.Build.Tests/BuildTestBase.cs @@ -62,6 +62,8 @@ namespace Wasm.Build.Tests Path.Combine(BuildEnvironment.TestDataPath, "nuget8.config"); // for now - we are still using net7, but with // targetFramework == "net7.0" ? "nuget7.config" : "nuget8.config"); + public const string WebcilInWasmExtension = ".wasm"; + static BuildTestBase() { try @@ -80,7 +82,7 @@ namespace Wasm.Build.Tests Console.WriteLine($"=============================================================================================="); Console.WriteLine($"=============== Running with {(s_buildEnv.IsWorkload ? "Workloads" : "No workloads")} ==============="); if (UseWebcil) - Console.WriteLine($"=============== Using .webcil ==============="); + Console.WriteLine($"=============== Using webcil-in-wasm ==============="); Console.WriteLine($"=============================================================================================="); Console.WriteLine(""); } @@ -685,7 +687,7 @@ namespace Wasm.Build.Tests string managedDir = Path.Combine(bundleDir, "managed"); string bundledMainAppAssembly = - useWebcil ? $"{projectName}.webcil" : $"{projectName}.dll"; + useWebcil ? $"{projectName}{WebcilInWasmExtension}" : $"{projectName}.dll"; AssertFilesExist(managedDir, new[] { bundledMainAppAssembly }); bool is_debug = config == "Debug"; diff --git a/src/mono/wasm/build/WasmApp.targets b/src/mono/wasm/build/WasmApp.targets index 3411b31..465c0a1 100644 --- a/src/mono/wasm/build/WasmApp.targets +++ b/src/mono/wasm/build/WasmApp.targets @@ -71,7 +71,7 @@ - $(WasmEnableLegacyJsInterop) - Include support for legacy JS interop. Defaults to true. - $(WasmEnableExceptionHandling) - Enable support for the WASM post MVP Exception Handling runtime extension. - $(WasmEnableSIMD) - Enable support for the WASM post MVP SIMD runtime extension. - - $(WasmEnableWebcil) - Enable conversion of assembly .dlls to .webcil + - $(WasmEnableWebcil) - Enable conversion of assembly .dlls to Webcil wrapped in .wasm - $(WasmIncludeFullIcuData) - Loads full ICU data (icudt.dat). Defaults to false. Only applicable when InvariantGlobalization=false. - $(WasmIcuDataFileName) - Name/path of ICU globalization file loaded to app. Only when InvariantGloblization=false and WasmIncludeFullIcuData=false. - $(WasmAllowUndefinedSymbols) - Controls whether undefined symbols are allowed or not, diff --git a/src/mono/wasm/debugger/BrowserDebugProxy/MonoSDBHelper.cs b/src/mono/wasm/debugger/BrowserDebugProxy/MonoSDBHelper.cs index 9e4538ff..4278f98 100644 --- a/src/mono/wasm/debugger/BrowserDebugProxy/MonoSDBHelper.cs +++ b/src/mono/wasm/debugger/BrowserDebugProxy/MonoSDBHelper.cs @@ -787,6 +787,8 @@ namespace Microsoft.WebAssembly.Diagnostics } internal sealed partial class MonoSDBHelper { + public const string WebcilInWasmExtension = ".wasm"; + private static int debuggerObjectId; private static int cmdId = 1; //cmdId == 0 is used by events which come from runtime private const int MINOR_VERSION = 61; @@ -1219,6 +1221,11 @@ namespace Microsoft.WebAssembly.Diagnostics string baseName = result.Substring(0, result.Length - 7); result = baseName + ".dll"; } + if (result.EndsWith(WebcilInWasmExtension)) { + /* don't leak webcil .wasm names to the debugger - work in terms of the original .dlls */ + string baseName = result.Substring(0, result.Length - WebcilInWasmExtension.Length); + result = baseName + ".dll"; + } return result; } diff --git a/src/mono/wasm/debugger/DebuggerTestSuite/DebuggerTestBase.cs b/src/mono/wasm/debugger/DebuggerTestSuite/DebuggerTestBase.cs index c5119a2..59f890e 100644 --- a/src/mono/wasm/debugger/DebuggerTestSuite/DebuggerTestBase.cs +++ b/src/mono/wasm/debugger/DebuggerTestSuite/DebuggerTestBase.cs @@ -24,6 +24,8 @@ namespace DebuggerTests DebuggerTestFirefox #endif { + public const string WebcilInWasmExtension = ".wasm"; + public DebuggerTests(ITestOutputHelper testOutput, string locale = "en-US", string driver = "debugger-driver.html") : base(testOutput, locale, driver) {} diff --git a/src/mono/wasm/debugger/DebuggerTestSuite/MonoJsTests.cs b/src/mono/wasm/debugger/DebuggerTestSuite/MonoJsTests.cs index 139daae..46e054e 100644 --- a/src/mono/wasm/debugger/DebuggerTestSuite/MonoJsTests.cs +++ b/src/mono/wasm/debugger/DebuggerTestSuite/MonoJsTests.cs @@ -136,7 +136,7 @@ namespace DebuggerTests : await Task.FromResult(ProtocolEventHandlerReturn.KeepHandler); }); - byte[] bytes = File.Exists(asm_path) ? File.ReadAllBytes(asm_path) : File.ReadAllBytes(Path.ChangeExtension(asm_path, ".webcil")); // hack! + byte[] bytes = File.Exists(asm_path) ? File.ReadAllBytes(asm_path) : File.ReadAllBytes(Path.ChangeExtension(asm_path, WebcilInWasmExtension)); // hack! string asm_base64 = Convert.ToBase64String(bytes); string pdb_base64 = String.Empty; diff --git a/src/tasks/Common/Utils.cs b/src/tasks/Common/Utils.cs index 89f2bfe..2f1cbd9 100644 --- a/src/tasks/Common/Utils.cs +++ b/src/tasks/Common/Utils.cs @@ -16,6 +16,8 @@ using Microsoft.Build.Utilities; internal static class Utils { + public static string WebcilInWasmExtension = ".wasm"; + private static readonly object s_SyncObj = new object(); public static string GetEmbeddedResource(string file) diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs index 87467d8..60c1afd 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ComputeWasmPublishAssets.cs @@ -589,7 +589,7 @@ public class ComputeWasmPublishAssets : Task } var extension = candidate.GetMetadata("Extension"); - if (string.Equals(extension, ".dll", StringComparison.Ordinal) || string.Equals(extension, ".webcil", StringComparison.Ordinal)) + if (string.Equals(extension, ".dll", StringComparison.Ordinal) || string.Equals (extension, Utils.WebcilInWasmExtension, StringComparison.Ordinal)) { var culture = candidate.GetMetadata("Culture"); var inferredCulture = candidate.GetMetadata("DestinationSubDirectory").Replace("\\", "/").Trim('/'); diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ConvertDllsToWebCil.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ConvertDllsToWebCil.cs index 17cc800..413fcdf 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ConvertDllsToWebCil.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/ConvertDllsToWebCil.cs @@ -66,7 +66,7 @@ public class ConvertDllsToWebCil : Task if (!Directory.Exists(candicatePath)) Directory.CreateDirectory(candicatePath); - var finalWebcil = Path.Combine(candicatePath, Path.GetFileNameWithoutExtension(filePath) + ".webcil"); + var finalWebcil = Path.Combine(candicatePath, Path.GetFileNameWithoutExtension(filePath) + Utils.WebcilInWasmExtension); if (Utils.CopyIfDifferent(tmpWebcil, finalWebcil, useHash: true)) Log.LogMessage(MessageImportance.Low, $"Generated {finalWebcil} ."); else @@ -75,13 +75,13 @@ public class ConvertDllsToWebCil : Task _fileWrites.Add(finalWebcil); var webcilItem = new TaskItem(finalWebcil, candidate.CloneCustomMetadata()); - webcilItem.SetMetadata("RelativePath", Path.ChangeExtension(candidate.GetMetadata("RelativePath"), ".webcil")); + webcilItem.SetMetadata("RelativePath", Path.ChangeExtension(candidate.GetMetadata("RelativePath"), Utils.WebcilInWasmExtension)); webcilItem.SetMetadata("OriginalItemSpec", finalWebcil); if (webcilItem.GetMetadata("AssetTraitName") == "Culture") { string relatedAsset = webcilItem.GetMetadata("RelatedAsset"); - relatedAsset = Path.ChangeExtension(relatedAsset, ".webcil"); + relatedAsset = Path.ChangeExtension(relatedAsset, Utils.WebcilInWasmExtension); webcilItem.SetMetadata("RelatedAsset", relatedAsset); Log.LogMessage(MessageImportance.Low, $"Changing related asset of {webcilItem} to {relatedAsset}."); } diff --git a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs index 0e8dc24..6a21c28 100644 --- a/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs +++ b/src/tasks/Microsoft.NET.Sdk.WebAssembly.Pack.Tasks/GenerateWasmBootJson.cs @@ -174,7 +174,7 @@ public class GenerateWasmBootJson : Task } else if (string.Equals("symbol", assetTraitValue, StringComparison.OrdinalIgnoreCase)) { - if (TryGetLazyLoadedAssembly($"{fileName}.dll", out _) || TryGetLazyLoadedAssembly($"{fileName}.webcil", out _)) + if (TryGetLazyLoadedAssembly($"{fileName}.dll", out _) || TryGetLazyLoadedAssembly($"{fileName}{Utils.WebcilInWasmExtension}", out _)) { Log.LogMessage(MessageImportance.Low, "Candidate '{0}' is defined as a lazy loaded symbols file.", resource.ItemSpec); resourceData.lazyAssembly ??= new ResourceHashesByNameDictionary(); diff --git a/src/tasks/WasmAppBuilder/WasmAppBuilder.cs b/src/tasks/WasmAppBuilder/WasmAppBuilder.cs index fca7a68..8f18345 100644 --- a/src/tasks/WasmAppBuilder/WasmAppBuilder.cs +++ b/src/tasks/WasmAppBuilder/WasmAppBuilder.cs @@ -187,7 +187,7 @@ public class WasmAppBuilder : WasmAppBuilderBaseTask var tmpWebcil = Path.GetTempFileName(); var webcilWriter = Microsoft.WebAssembly.Build.Tasks.WebcilConverter.FromPortableExecutable(inputPath: assembly, outputPath: tmpWebcil, logger: Log); webcilWriter.ConvertToWebcil(); - var finalWebcil = Path.Combine(asmRootPath, Path.ChangeExtension(Path.GetFileName(assembly), ".webcil")); + var finalWebcil = Path.Combine(asmRootPath, Path.ChangeExtension(Path.GetFileName(assembly), Utils.WebcilInWasmExtension)); if (Utils.CopyIfDifferent(tmpWebcil, finalWebcil, useHash: true)) Log.LogMessage(MessageImportance.Low, $"Generated {finalWebcil} ."); else @@ -259,7 +259,7 @@ public class WasmAppBuilder : WasmAppBuilderBaseTask { if (UseWebcil) { - assemblyPath = Path.Combine(asmRootPath, Path.ChangeExtension(Path.GetFileName(assembly), ".webcil")); + assemblyPath = Path.Combine(asmRootPath, Path.ChangeExtension(Path.GetFileName(assembly), Utils.WebcilInWasmExtension)); // For the hash, read the bytes from the webcil file, not the dll file. bytes = File.ReadAllBytes(assemblyPath); } @@ -287,7 +287,7 @@ public class WasmAppBuilder : WasmAppBuilderBaseTask var tmpWebcil = Path.GetTempFileName(); var webcilWriter = Microsoft.WebAssembly.Build.Tasks.WebcilConverter.FromPortableExecutable(inputPath: args.fullPath, outputPath: tmpWebcil, logger: Log); webcilWriter.ConvertToWebcil(); - var finalWebcil = Path.Combine(directory, Path.ChangeExtension(name, ".webcil")); + var finalWebcil = Path.Combine(directory, Path.ChangeExtension(name, Utils.WebcilInWasmExtension)); if (Utils.CopyIfDifferent(tmpWebcil, finalWebcil, useHash: true)) Log.LogMessage(MessageImportance.Low, $"Generated {finalWebcil} ."); else -- 2.7.4