-
Notifications
You must be signed in to change notification settings - Fork 154
Interpreter work #1765
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Interpreter work #1765
Changes from 21 commits
fadf033
f20deb4
2e2fcf4
1fc8b00
8661add
1e5e657
d8ebc0c
de3d2de
ab19281
8fa3db8
0582374
23c7b9f
3f2607b
b2cfffb
2644a09
f879ad4
7a97c94
4b8516e
256c7e7
04b8010
07a709c
89d8bfc
e8b284e
44d0a2d
47dceb4
20bd0d9
668dfd0
14f1f5b
1d40718
9e3952f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -279,6 +279,10 @@ class Lisp { | |
| std::atomic<T_sp> _AllObjectFiles; | ||
| std::atomic<T_sp> _AllCodeBlocks; | ||
| std::atomic<T_sp> _AllBytecodeModules; | ||
| // Every GFBytecodeSimpleFun ever made (atomic-pushed list of cons cells). | ||
| // Walked by arena_post_load_regenerate_trampolines after a snapshot load | ||
| // so the dispatch trampoline for each generic function gets re-attached. | ||
| std::atomic<T_sp> _AllGFBytecodeFuns; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe should be a List_sp |
||
| SimpleFun_sp _UnboundCellFunctionEntryPoint; | ||
| T_sp _TerminalIO; | ||
| List_sp _ActiveThreads; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| /* | ||
| * sampling_profiler.h — CPU-time sampling profiler. | ||
| * | ||
| * At rate `N` Hz, an ITIMER_PROF timer delivers SIGPROF to an arbitrary | ||
| * running thread. The handler walks the frame-pointer chain via the | ||
| * ucontext registers and appends a sample (timestamp, thread id, depth, | ||
| * optional bytecode-VM pc, variable-length PC array) to a per-process | ||
| * bump-allocated ring. | ||
| * | ||
| * Separate from src/core/profiler.cc's RangePush/RangePop instrumentation. | ||
| * That profiler measures user-annotated regions; this one periodically | ||
| * snapshots whatever code is running. | ||
| * | ||
| * See Phase 4 / Phase 5 for post-mortem symbolication and flame-graph | ||
| * output — this header covers the recording side only. | ||
| */ | ||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
| #include <cstddef> | ||
|
|
||
| namespace core { | ||
|
|
||
| // Per-sample header (variable-length record). A SampleHeader is followed | ||
| // immediately in the ring buffer by `depth` × uint64_t native PCs. | ||
| struct SampleHeader { | ||
| uint64_t timestamp_ns; // CLOCK_MONOTONIC at signal delivery | ||
| uint64_t vm_pc; // bytecode VM's _pc at sample time, or 0 | ||
| uint32_t thread_id; // Linux tid / macOS port id (truncated) | ||
| uint32_t depth; // number of trailing PCs (0 if walk failed) | ||
| }; | ||
|
|
||
| // Start the profiler. | ||
| // rate_hz : sampling rate in Hz (e.g. 97). Clamped to [1, 10000]. | ||
| // max_depth : per-sample stack-depth cap. Clamped to [1, 8192]. | ||
| // buffer_bytes : ring buffer size (0 = default 256 MiB). | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could just have the default be the actual number for 256 MiB rather than giving 0 a special meaning |
||
| // Returns true on success. Fails if the profiler is already running or the | ||
| // OS timer/signal setup fails. | ||
| bool sampling_profiler_start(unsigned rate_hz, | ||
| unsigned max_depth, | ||
| size_t buffer_bytes); | ||
|
|
||
| // Stop sampling. The buffer is preserved; call | ||
| // sampling_profiler_save / sampling_profiler_reset to drain / clear. | ||
| void sampling_profiler_stop(); | ||
|
|
||
| // True while a profile session is active. | ||
| bool sampling_profiler_running(); | ||
|
|
||
| // Discard all captured samples and reset the bump pointer. | ||
| void sampling_profiler_reset(); | ||
|
|
||
| // Drop the ring buffer contents to `path` as collapsed-stacks format | ||
| // (one stack per line, semicolon-separated, trailing ' <count>'), ready | ||
| // to feed Brendan Gregg's flamegraph.pl. Symbolicates on the fly using | ||
| // the arena side table, ObjectFile lookup, bytecode-module scan, and | ||
| // dladdr. Returns true on success, false on I/O error. | ||
| bool sampling_profiler_save(const char* path); | ||
|
|
||
| // Diagnostics. | ||
| size_t sampling_profiler_samples_recorded(); | ||
| size_t sampling_profiler_samples_dropped(); | ||
| size_t sampling_profiler_bytes_used(); | ||
|
|
||
| } // namespace core | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,10 +78,39 @@ extern int global_debug_virtual_machine; | |
| #define VM_RESET_COUNTERS(vm) | ||
| #endif | ||
|
|
||
| // ---------- Dynamic-environment records for the bytecode interpreter ---------- | ||
| // The bytecode VM establishes dynamic environments (tagbody, catch, | ||
| // special-bind, progv, unwind-protect) by pushing records onto a side stack | ||
| // instead of recursing into bytecode_vm. The entering opcodes push; the | ||
| // matching exit opcodes pop; an outer try/catch(Unwind&) in bytecode_vm walks | ||
| // the stack to run cleanups / resume at a saved pc on non-local exits. | ||
| // | ||
| // Currently only the type and the stack exist — no opcodes are migrated yet. | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This entire parallel structure of dynenvs is pointless duplication. Even if we do want to keep the parallel stack of dynenvs, which I don't think we do, the kind, frame, slots, and all are apparent from the existing dynenv classes (TagbodyDynEnv_O etc.). A separate enum for what are types, padding bytes (??), slots that are punned, this is all really unacceptable. Only the bytecode specific bits like the sp mark and target pc should really need recording anyway, and we could probably just add them to the dynenv classes or make new ones. Adding to dynenv classes might also remove the need for VMFrameDynEnv which would be good. |
||
| enum class VMDynKind : uint8_t { | ||
| Tagbody = 1, // from `entry` opcode (not yet migrated) | ||
| Catch, // from `catch_8/16` (not yet migrated) | ||
| SpecialBind, // from `special_bind` (one per bound cell) | ||
| Progv, // from `progv` (one record covers N bindings) | ||
| UnwindProtect, // from `protect` | ||
| }; | ||
|
|
||
| struct VMDynRecord { | ||
| VMDynKind kind; | ||
| uint8_t _pad[7]; | ||
| void* frame; // __builtin_frame_address at establishment | ||
| core::T_O* slot0; // kind-specific GC-managed: tag / cell / cleanup closure | ||
| core::T_O* slot1; // kind-specific GC-managed: old binding value | ||
| core::T_O** sp_mark; // stack pointer at establishment | ||
| core::T_O** fp_mark; // frame pointer at establishment | ||
| unsigned char* target_pc; // resume pc (Tagbody/Catch) | ||
| core::T_O* dynenv_mark; // saved head of my_thread->dynEnvStackGet() | ||
| }; | ||
|
|
||
| struct VirtualMachine { | ||
| // Stack size is kind of arbitrary, and really we should make it | ||
| // grow and etc. | ||
| static constexpr size_t MaxStackWords = 65536; | ||
| static constexpr size_t MaxDynRecords = 4096; | ||
| bool _Running; | ||
| core::T_O** _stackBottom = nullptr; | ||
| size_t _stackBytes; | ||
|
|
@@ -101,6 +130,13 @@ struct VirtualMachine { | |
| core::T_O** _literals; | ||
| unsigned char* _pc; | ||
|
|
||
| // Dynamic-environment record stack. Root-allocated so GC scans the | ||
| // T_O*/T_O** slots conservatively. _dynRecordTop points one past the last | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of the slots in the parallel dynenv structures already exist in the dynenv classes, which are already reachable from the control stack or from the thread local state, so GC should not be a concern here |
||
| // live record, so an empty stack has _dynRecordTop == _dynRecordBottom. | ||
| VMDynRecord* _dynRecordBottom = nullptr; | ||
| VMDynRecord* _dynRecordLimit = nullptr; | ||
| VMDynRecord* _dynRecordTop = nullptr; | ||
|
|
||
| void error(); | ||
|
|
||
| void enable_guards(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,13 @@ class ObjectFile_O : public LibraryBase_O { | |
| size_t _Size; | ||
| size_t _ObjectId; | ||
| JITDylib_sp _TheJITDylib; | ||
| // If true, this ObjectFile is transient arena-init scaffolding (shared | ||
| // trampoline / stub template) that must not be serialized into snapshots. | ||
| // The ObjectFile is still registered in _AllObjectFiles normally — LLVM's | ||
| // link layer plugin looks it up by name during materialization, so it must | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our link layer plugin looks it up by name during materialization, not LLVM. And it does so for reasons trampolines don't need - so that we can look up DWARF from an instruction pointer. Since trampolines are for bytecode functions, we can use the bytecode debug info mechanisms instead of using DWARF at all. So I don't think trampolines need to go in _AllObjectFiles. EDIT: Okay, so actually these changes use the trampoline to get closure etc, for some reason, so we do need DWARF. However to figure out if a PC is in an arena we can use |
||
| // stay findable at runtime. The snapshot save walker checks this flag and | ||
| // skips any ObjectFile with it set. | ||
| bool _TransientSkipSnapshot = false; | ||
| // | ||
| // Code data | ||
| void* _TextSectionStart; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| #pragma once | ||
|
|
||
| /* | ||
| File: trampoline.h | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. filename is incorrect |
||
| */ | ||
|
|
||
| /* | ||
| Copyright (c) 2014, Christian E. Schafmeister | ||
|
|
||
| CLASP is free software; you can redistribute it and/or | ||
| modify it under the terms of the GNU Library General Public | ||
| License as published by the Free Software Foundation; either | ||
| version 2 of the License, or (at your option) any later version. | ||
|
|
||
| See directory 'clasp/licenses' for full details. | ||
|
|
||
| The above copyright notice and this permission notice shall be included in | ||
| all copies or substantial portions of the Software. | ||
|
|
||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| THE SOFTWARE. | ||
| */ | ||
| /* -^- */ | ||
|
|
||
| #include <clasp/core/common.h> | ||
|
|
||
| namespace llvmo { | ||
|
|
||
|
|
||
| core::Pointer_mv cmp__compile_trampoline(core::T_sp name); | ||
|
|
||
| // Per-generic-function trampoline. Returns the address of an arena slot that | ||
| // tail-calls GFBytecodeEntryPoint::entry_point_n. Each GF gets a unique | ||
| // address so flame charts and backtraces show its name instead of all GFs | ||
| // sharing the static entry_point_n symbol. | ||
| core::Pointer_sp cmp__compile_gf_trampoline(core::T_sp name); | ||
|
|
||
| }; // namespace llvmo | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"list of cons cells" indicates that this is a list whose elements are conses, but actually they are GFBytecodeSimpleFuns as suggested by the name.