diff --git a/Cargo.lock b/Cargo.lock index ab64acd84..c12df3919 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1182,6 +1182,7 @@ dependencies = [ name = "hash-vm" version = "0.1.0" dependencies = [ + "hash-abi", "hash-reporting", "hash-source", "hash-utils", diff --git a/compiler/hash-abi/src/lib.rs b/compiler/hash-abi/src/lib.rs index 3aa620b81..b1b34b754 100644 --- a/compiler/hash-abi/src/lib.rs +++ b/compiler/hash-abi/src/lib.rs @@ -43,7 +43,7 @@ impl CallingConvention { } } -new_store_key!(pub FnAbiId); +new_store_key!(pub FnAbiId, derives = Debug); /// Defines ABI specific information about a function. /// diff --git a/compiler/hash-codegen-vm/src/translation/mod.rs b/compiler/hash-codegen-vm/src/translation/mod.rs index d4477eaf8..2acffb11a 100644 --- a/compiler/hash-codegen-vm/src/translation/mod.rs +++ b/compiler/hash-codegen-vm/src/translation/mod.rs @@ -18,7 +18,7 @@ use hash_codegen::{ }; use hash_ir::IrCtx; use hash_pipeline::settings::CompilerSettings; -use hash_vm::bytecode_builder::BytecodeBuilder; +use hash_vm::builder::BytecodeBuilder; use crate::ctx::Ctx; diff --git a/compiler/hash-ir/src/ir.rs b/compiler/hash-ir/src/ir.rs index ab2044e60..d89e71e5d 100644 --- a/compiler/hash-ir/src/ir.rs +++ b/compiler/hash-ir/src/ir.rs @@ -839,6 +839,13 @@ impl BasicBlockData { self.statements.is_empty() && self.terminator.as_ref().is_some_and(|t| t.kind == TerminatorKind::Unreachable) } + + /// Get the size of the [BasicBlockData]. + /// + /// This is the number of statements in the block plus the terminator. + pub fn size(&self) -> usize { + self.statements.len() + self.terminator.as_ref().map_or(0, |_| 1) + } } index_vec::define_index_type! { @@ -1032,6 +1039,37 @@ impl Body { pub fn source(&self) -> SourceId { self.origin.source() } + + /// Get the size of the [Body]. + pub fn size(&self) -> usize { + let stats = self.stats(); + stats.statements as usize + } + + /// Get the statistics of the [Body]. + pub fn stats(&self) -> BodyStats { + let mut stats = BodyStats::default(); + + for block in self.basic_blocks.blocks.iter() { + stats.statements += block.size() as u32; + stats.terminators += block.terminator.as_ref().map_or(0, |_| 1) as u32; + stats.basic_blocks += 1; + } + + stats + } +} + +/// The statistics of the body. +#[derive(Default)] + +pub struct BodyStats { + /// The number of basic blocks in the body. + pub basic_blocks: u32, + /// The number of statements in the body. + pub statements: u32, + /// The number of terminators in the body. + pub terminators: u32, } /// This struct contains additional metadata about the body that was lowered, diff --git a/compiler/hash-vm/Cargo.toml b/compiler/hash-vm/Cargo.toml index 5a3ffc3db..60ac658c2 100644 --- a/compiler/hash-vm/Cargo.toml +++ b/compiler/hash-vm/Cargo.toml @@ -11,3 +11,4 @@ doctest = false hash-reporting = { workspace = true } hash-source = { workspace = true } hash-utils = { workspace = true } +hash-abi = { workspace = true } diff --git a/compiler/hash-vm/src/builder/func.rs b/compiler/hash-vm/src/builder/func.rs new file mode 100644 index 000000000..18fbc1010 --- /dev/null +++ b/compiler/hash-vm/src/builder/func.rs @@ -0,0 +1,63 @@ +//! Function builder related logic for the Hash VM. + +use hash_abi::FnAbiId; +use hash_utils::index_vec::IndexVec; + +use crate::bytecode::{Instruction, op::LabelOffset}; + +// Import FunctionBuilder if it's defined in another module +#[derive(Debug)] +pub struct FunctionBuilder { + /// The ABI of the function, this is used to generate + /// the correct instructions for the function, to read the + /// arguments and return values correctly. + pub abi: FnAbiId, + + /// The body of the function. All instructions that make up the function + /// are stored within the body. However, labels are stored separately to + /// allow for easier management of jumps and branches. + pub body: IndexVec, + + /// The labels within the function body, these are used to + /// manage jumps and branches. The labels store the literal index + /// within the function body where the label is located. This is essentially + /// a mapping from instruction labels to their offsets: + /// + /// 0 -=-> LabelOffset(0) + /// | + /// \ Instruction 0 + /// Instruction 1 + /// ... + /// 1---> LabelOffset(5): + /// | + /// \ Instruction 5 + /// ... + pub labels: IndexVec, + + /// The current label counter, this is used to generate new labels. + label_counter: LabelOffset, +} + +impl FunctionBuilder { + /// Create a new [FunctionBuilder] with the given ABI. + pub fn new(abi: FnAbiId) -> Self { + Self { + abi, + body: IndexVec::new(), + labels: IndexVec::new(), + label_counter: LabelOffset::new(0), + } + } + + /// Generate a new label within the function. + pub fn new_label(&mut self) -> LabelOffset { + let label = self.label_counter; + self.label_counter = LabelOffset::new(label.get() + 1); + label + } + + /// Add an instruction to the function body. + pub fn emit(&mut self, instruction: Instruction) { + self.body.push(instruction); + } +} diff --git a/compiler/hash-vm/src/builder/instruction.rs b/compiler/hash-vm/src/builder/instruction.rs new file mode 100644 index 000000000..adb2c98ca --- /dev/null +++ b/compiler/hash-vm/src/builder/instruction.rs @@ -0,0 +1,1190 @@ +//! Instruction builder macros for convenient VM bytecode generation. +//! +//! This module provides the [`inst!`] macro which allows writing instructions +//! in a readable, assembly-like syntax. +//! +//! # Syntax +//! +//! The macro supports several types of operands (note the space after prefix): +//! - `[N]` - Literal register number (e.g., `[100]`) +//! - `r [expr]` - Parametrized register (e.g., `r [reg_id]` or `r [5 + 10]`) +//! - `@ [N]` - Block label reference (e.g., `@ [6]`) +//! - `# [N]` - Immediate value (e.g., `# [42]`) +//! +//! # Examples +//! +//! ```ignore +//! use hash_vm::builder::inst; +//! +//! // Basic usage with literal registers +//! let instructions = inst! { +//! add64 [0], [42]; +//! mov [100], [98]; +//! push64 [10]; +//! }; +//! +//! // Using parametrized registers with expressions +//! let instructions = inst! { +//! add64 r [5], r [10]; +//! mov r [5 + 10], r [10 + 10]; +//! }; +//! +//! // Using label references for jumps +//! let instructions = inst! { +//! add32 [1], [2]; +//! cmp [1], [100]; +//! jmpzero [1], @ [0]; +//! }; +//! +//! // Using immediate values for write instructions +//! let instructions = inst! { +//! write64 [50], # [1234]; +//! write32 [51], # [42]; +//! }; +//! ``` + +/// Helper macro to parse operands in instruction syntax. +#[doc(hidden)] +#[macro_export] +macro_rules! __parse_operand { + // Parametrized register: r [expr] + (r [$expr:expr]) => { + $crate::bytecode::register::Register::new($expr as u8) + }; + + // Literal register: [N] + ([$lit:literal]) => { + $crate::bytecode::register::Register::new($lit) + }; +} + +/// Helper macro to parse operand values (for Operand enum). +#[doc(hidden)] +#[macro_export] +macro_rules! __parse_operand_value { + // Parametrized register: r [expr] + (r [$expr:expr]) => { + $crate::bytecode::op::Operand::Register($crate::bytecode::register::Register::new( + $expr as u8, + )) + }; + + // Literal register: [N] + ([$lit:literal]) => { + $crate::bytecode::op::Operand::Register($crate::bytecode::register::Register::new($lit)) + }; + + // Label reference: @ [N] + (@ [$expr:expr]) => { + $crate::bytecode::op::Operand::Label($crate::bytecode::op::LabelOffset::new($expr)) + }; + + // Immediate value: # [N] + (# [$expr:expr]) => { + $crate::bytecode::op::Operand::Immediate($expr) + }; +} + +/// Main instruction builder macro. +/// +/// Supports assembly-like syntax for creating VM instructions. +/// +/// # Operand Syntax (note the space after prefix) +/// - `[N]`: Literal register number +/// - `r [expr]`: Parametrized register from expression +/// - `@ [N]`: Label offset reference +/// - `# [N]`: Immediate value +/// +/// # Examples +/// +/// ```ignore +/// use hash_vm::builder::inst; +/// +/// let instructions = inst! { +/// // Stack operations +/// push64 [10]; +/// pop64 [11]; +/// +/// // Arithmetic +/// add64 [1], [2]; +/// sub32 [3], [4]; +/// mul16 [5], [6]; +/// +/// // Move operation +/// mov [100], [98]; +/// +/// // With parametrized registers +/// mov r [5], r [10]; +/// +/// // Jumps with labels +/// jmp @ [0]; +/// jmpzero [0], @ [100]; +/// +/// // Write with immediate +/// write64 [50], # [1234]; +/// write32 [51], # [42]; +/// +/// // System calls +/// syscall [0]; +/// call [10]; +/// return; +/// }; +/// ``` +#[macro_export] +macro_rules! inst { + // Empty case + () => { vec![] }; + + // Entry point: collect all instructions + ($($inst:tt)*) => {{ + let mut instructions = Vec::new(); + $crate::__inst_impl!(instructions; $($inst)*); + instructions + }}; +} + +/// Internal implementation macro for instruction parsing. +#[doc(hidden)] +#[macro_export] +macro_rules! __inst_impl { + // Base case: no more instructions + ($vec:ident;) => {}; + + // Stack operations - Pop (8/16/32/64 bit) + ($vec:ident; pop8 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Pop8 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; pop16 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Pop16 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; pop32 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Pop32 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; pop64 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Pop64 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Stack operations - Push (8/16/32/64 bit) + ($vec:ident; push8 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Push8 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; push16 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Push16 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; push32 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Push32 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; push64 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Push64 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Arithmetic operations - Addition + ($vec:ident; add8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; add16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; add32 r[$r1:expr], r[$r2:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add32 { + l1: $crate::__parse_operand!(r [$r1]), + l2: $crate::__parse_operand!(r [$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; add32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; add64 r[$r1:expr], r[$r2:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add64 { + l1: $crate::__parse_operand!(r [$r1]), + l2: $crate::__parse_operand!(r [$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; add64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Arithmetic operations - Subtraction + ($vec:ident; sub8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Sub8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; sub16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Sub16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; sub32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Sub32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; sub64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Sub64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Arithmetic operations - Multiplication + ($vec:ident; mul8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mul8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mul16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mul16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mul32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mul32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mul64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mul64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Arithmetic operations - Division + ($vec:ident; div8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Div8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; div16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Div16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; div32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Div32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; div64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Div64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Arithmetic operations - Modulo + ($vec:ident; mod8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mod8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mod16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mod16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mod32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mod32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mod64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mod64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Signed operations - Division + ($vec:ident; idiv8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IDiv8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; idiv16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IDiv16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; idiv32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IDiv32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; idiv64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IDiv64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Signed operations - Multiplication + ($vec:ident; imul8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IMul8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; imul16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IMul16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; imul32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IMul32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; imul64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::IMul64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Float operations - Addition + ($vec:ident; addf32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::AddF32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; addf64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::AddF64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Float operations - Subtraction + ($vec:ident; subf32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::SubF32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; subf64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::SubF64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Float operations - Multiplication + ($vec:ident; mulf32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::MulF32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mulf64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::MulF64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Float operations - Division + ($vec:ident; divf32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::DivF32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; divf64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::DivF64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Float operations - Modulo + ($vec:ident; modf32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::ModF32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; modf64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::ModF64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Float operations - Power + ($vec:ident; powf32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::PowF32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; powf64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::PowF64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Bitwise operations - XOR + ($vec:ident; xor8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Xor8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; xor16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Xor16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; xor32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Xor32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; xor64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Xor64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Bitwise operations - OR + ($vec:ident; or8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Or8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; or16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Or16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; or32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Or32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; or64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Or64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Bitwise operations - AND + ($vec:ident; and8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::And8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; and16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::And16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; and32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::And32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; and64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::And64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Bitwise operations - NOT + ($vec:ident; not8 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Not8 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; not16 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Not16 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; not32 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Not32 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; not64 $r1:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Not64 { l1: $crate::__parse_operand!($r1) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Bitwise operations - Shift left + ($vec:ident; shl8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shl8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; shl16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shl16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; shl32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shl32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; shl64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shl64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Bitwise operations - Shift right + ($vec:ident; shr8 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shr8 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; shr16 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shr16 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; shr32 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shr32 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; shr64 $r1:tt, $r2:tt; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Shr64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!($r2) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Write operations with immediate values + ($vec:ident; write8 $r1:tt, # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write8 { + reg: $crate::__parse_operand!($r1), + value: $val as u8 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; write16 $r1:tt, # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write16 { + reg: $crate::__parse_operand!($r1), + value: $val as u16 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; write32 $r1:tt, # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write32 { + reg: $crate::__parse_operand!($r1), + value: $val as u32 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; write64 $r1:tt, # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write64 { + reg: $crate::__parse_operand!($r1), + value: $val as u64 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Control flow operations + ($vec:ident; call r [$r1:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Call { func: $crate::__parse_operand!(r [$r1]) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; call [$r1:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Call { func: $crate::__parse_operand!([$r1]) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + ($vec:ident; mov r [$dest:expr], r [$src:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mov { + dest: $crate::__parse_operand!(r [$dest]), + src: $crate::__parse_operand!(r [$src]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mov r [$dest:expr], [$src:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mov { + dest: $crate::__parse_operand!(r [$dest]), + src: $crate::__parse_operand!([$src]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mov [$dest:literal], r [$src:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mov { + dest: $crate::__parse_operand!([$dest]), + src: $crate::__parse_operand!(r [$src]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; mov [$dest:literal], [$src:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Mov { + dest: $crate::__parse_operand!([$dest]), + src: $crate::__parse_operand!([$src]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + ($vec:ident; syscall r [$r1:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Syscall { id: $crate::__parse_operand!(r [$r1]) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; syscall [$r1:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Syscall { id: $crate::__parse_operand!([$r1]) }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + ($vec:ident; return; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Return); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Jump operations with Operand support (labels and registers) + ($vec:ident; jmp @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Jmp { + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmp r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Jmp { + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmp [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Jmp { + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + ($vec:ident; jmppos [$r1:literal], @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpPos { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmppos [$r1:literal], r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpPos { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmppos [$r1:literal], [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpPos { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmppos r [$r1:expr], @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpPos { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmppos r [$r1:expr], r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpPos { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmppos r [$r1:expr], [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpPos { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + ($vec:ident; jmpneg [$r1:literal], @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpNeg { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpneg [$r1:literal], r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpNeg { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpneg [$r1:literal], [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpNeg { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpneg r [$r1:expr], @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpNeg { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpneg r [$r1:expr], r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpNeg { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpneg r [$r1:expr], [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpNeg { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + ($vec:ident; jmpzero [$r1:literal], @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpZero { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpzero [$r1:literal], r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpZero { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpzero [$r1:literal], [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpZero { + l1: $crate::__parse_operand!([$r1]), + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpzero r [$r1:expr], @ [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpZero { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!(@ [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpzero r [$r1:expr], r [$loc:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpZero { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!(r [$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; jmpzero r [$r1:expr], [$loc:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::JmpZero { + l1: $crate::__parse_operand!(r [$r1]), + location: $crate::__parse_operand_value!([$loc]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Comparison + ($vec:ident; cmp r [$r1:expr], r [$r2:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Cmp { + l1: $crate::__parse_operand!(r [$r1]), + l2: $crate::__parse_operand!(r [$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; cmp r [$r1:expr], [$r2:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Cmp { + l1: $crate::__parse_operand!(r [$r1]), + l2: $crate::__parse_operand!([$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; cmp [$r1:literal], r [$r2:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Cmp { + l1: $crate::__parse_operand!([$r1]), + l2: $crate::__parse_operand!(r [$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; cmp [$r1:literal], [$r2:literal]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Cmp { + l1: $crate::__parse_operand!([$r1]), + l2: $crate::__parse_operand!([$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; +} + +#[cfg(test)] +mod tests { + use crate::bytecode::{Instruction, op::Operand, register::Register}; + + #[test] + fn test_literal_registers() { + let instructions = inst! { + mov [10], [20]; + add64 [1], [2]; + }; + + assert_eq!(instructions.len(), 2); + assert!(matches!(instructions[0], Instruction::Mov { .. })); + assert!(matches!(instructions[1], Instruction::Add64 { .. })); + } + + #[test] + fn test_register_with_expressions() { + // The r [expr] syntax is for computed register indices + let instructions = inst! { + mov r [5], r [10]; + add32 r[15], r[20]; + }; + + assert_eq!(instructions.len(), 2); + if let Instruction::Mov { dest: d, src: s } = instructions[0] { + assert_eq!(d, Register::new(5)); + assert_eq!(s, Register::new(10)); + } else { + panic!("Expected Mov instruction"); + } + + if let Instruction::Add32 { l1, l2 } = instructions[1] { + assert_eq!(l1, Register::new(15)); + assert_eq!(l2, Register::new(20)); + } else { + panic!("Expected Add32 instruction"); + } + } + + #[test] + fn test_stack_operations() { + let instructions = inst! { + push64 [10]; + push32 [20]; + pop64 [30]; + pop32 [40]; + }; + + assert_eq!(instructions.len(), 4); + assert!(matches!(instructions[0], Instruction::Push64 { .. })); + assert!(matches!(instructions[1], Instruction::Push32 { .. })); + assert!(matches!(instructions[2], Instruction::Pop64 { .. })); + assert!(matches!(instructions[3], Instruction::Pop32 { .. })); + } + + #[test] + fn test_arithmetic_operations() { + let instructions = inst! { + add64 [1], [2]; + sub32 [3], [4]; + mul16 [5], [6]; + div8 [7], [8]; + }; + + assert_eq!(instructions.len(), 4); + assert!(matches!(instructions[0], Instruction::Add64 { .. })); + assert!(matches!(instructions[1], Instruction::Sub32 { .. })); + assert!(matches!(instructions[2], Instruction::Mul16 { .. })); + assert!(matches!(instructions[3], Instruction::Div8 { .. })); + } + + #[test] + fn test_signed_operations() { + let instructions = inst! { + idiv32 [1], [2]; + imul64 [3], [4]; + }; + + assert_eq!(instructions.len(), 2); + assert!(matches!(instructions[0], Instruction::IDiv32 { .. })); + assert!(matches!(instructions[1], Instruction::IMul64 { .. })); + } + + #[test] + fn test_float_operations() { + let instructions = inst! { + addf64 [1], [2]; + subf32 [3], [4]; + mulf64 [5], [6]; + divf32 [7], [8]; + powf64 [9], [10]; + }; + + assert_eq!(instructions.len(), 5); + assert!(matches!(instructions[0], Instruction::AddF64 { .. })); + assert!(matches!(instructions[1], Instruction::SubF32 { .. })); + assert!(matches!(instructions[2], Instruction::MulF64 { .. })); + assert!(matches!(instructions[3], Instruction::DivF32 { .. })); + assert!(matches!(instructions[4], Instruction::PowF64 { .. })); + } + + #[test] + fn test_bitwise_operations() { + let instructions = inst! { + and64 [1], [2]; + or32 [3], [4]; + xor16 [5], [6]; + not8 [7]; + shl64 [8], [9]; + shr32 [10], [11]; + }; + + assert_eq!(instructions.len(), 6); + assert!(matches!(instructions[0], Instruction::And64 { .. })); + assert!(matches!(instructions[1], Instruction::Or32 { .. })); + assert!(matches!(instructions[2], Instruction::Xor16 { .. })); + assert!(matches!(instructions[3], Instruction::Not8 { .. })); + assert!(matches!(instructions[4], Instruction::Shl64 { .. })); + assert!(matches!(instructions[5], Instruction::Shr32 { .. })); + } + + #[test] + fn test_write_with_immediate() { + let instructions = inst! { + write64 [50], # [1234]; + write32 [51], # [42]; + write16 [52], # [255]; + write8 [53], # [128]; + }; + + assert_eq!(instructions.len(), 4); + + if let Instruction::Write64 { reg, value } = instructions[0] { + assert_eq!(reg, Register::new(50)); + assert_eq!(value, 1234); + } else { + panic!("Expected Write64 instruction"); + } + + if let Instruction::Write32 { reg, value } = instructions[1] { + assert_eq!(reg, Register::new(51)); + assert_eq!(value, 42); + } else { + panic!("Expected Write32 instruction"); + } + } + + #[test] + fn test_jumps_with_labels() { + let instructions = inst! { + jmp @ [0]; + jmpzero [1], @ [100]; + jmppos [2], @ [0]; + jmpneg [3], @ [100]; + }; + + assert_eq!(instructions.len(), 4); + + if let Instruction::Jmp { location } = instructions[0] { + assert!(matches!(location, Operand::Label(_))); + } else { + panic!("Expected Jmp instruction"); + } + + if let Instruction::JmpZero { l1, location } = instructions[1] { + assert_eq!(l1, Register::new(1)); + assert!(matches!(location, Operand::Label(_))); + } else { + panic!("Expected JmpZero instruction"); + } + } + + #[test] + fn test_jumps_with_registers() { + let instructions = inst! { + jmp [10]; + jmpzero [1], [20]; + }; + + assert_eq!(instructions.len(), 2); + + if let Instruction::Jmp { location } = instructions[0] { + assert!(matches!(location, Operand::Register(_))); + } else { + panic!("Expected Jmp instruction"); + } + } + + #[test] + fn test_control_flow() { + let instructions = inst! { + call [100]; + syscall [0]; + return; + cmp [1], [2]; + }; + + assert_eq!(instructions.len(), 4); + assert!(matches!(instructions[0], Instruction::Call { .. })); + assert!(matches!(instructions[1], Instruction::Syscall { .. })); + assert!(matches!(instructions[2], Instruction::Return)); + assert!(matches!(instructions[3], Instruction::Cmp { .. })); + } + + #[test] + fn test_mixed_operand_types() { + let instructions = inst! { + mov [10], [15]; + add64 [15], [20]; + jmp @ [42]; + write64 [30], # [9999]; + }; + + assert_eq!(instructions.len(), 4); + + if let Instruction::Mov { dest, src } = instructions[0] { + assert_eq!(dest, Register::new(10)); + assert_eq!(src, Register::new(15)); + } else { + panic!("Expected Mov instruction"); + } + } + + #[test] + fn test_empty_macro() { + let instructions: Vec = inst! {}; + assert_eq!(instructions.len(), 0); + } + + #[test] + fn test_complex_sequence() { + // A simple loop counting from 0 to 100 + let instructions = inst! { + write64 [10], # [0]; + write64 [11], # [100]; + add64 [10], [1]; + cmp [10], [11]; + jmppos [10], @ [0]; + return; + }; + + assert_eq!(instructions.len(), 6); + assert!(matches!(instructions[0], Instruction::Write64 { .. })); + assert!(matches!(instructions[1], Instruction::Write64 { .. })); + assert!(matches!(instructions[2], Instruction::Add64 { .. })); + assert!(matches!(instructions[3], Instruction::Cmp { .. })); + assert!(matches!(instructions[4], Instruction::JmpPos { .. })); + assert!(matches!(instructions[5], Instruction::Return)); + } +} diff --git a/compiler/hash-vm/src/builder/mod.rs b/compiler/hash-vm/src/builder/mod.rs new file mode 100644 index 000000000..ea83738a3 --- /dev/null +++ b/compiler/hash-vm/src/builder/mod.rs @@ -0,0 +1,112 @@ +//! Hash Compiler VM bytecode building module. +//! +//! This module holds utilities and data structures to generate bytecode and +//! store it in the format that the VM expects. + +mod func; +mod instruction; + +use std::collections::HashMap; + +use hash_abi::FnAbiId; + +use crate::{ + builder::func::FunctionBuilder, + bytecode::{Instruction, op::Operand}, +}; + +#[derive(Debug)] +pub struct FunctionCtx { + /// The ABI of the function, this is used to generate + /// the correct instructions for the function, to read the + /// arguments and return values correctly. + pub abi: FnAbiId, + + /// The address of the function within the entire bytecode program. + pub offset: usize, +} + +#[derive(Debug, Default)] +pub struct BytecodeBuilder { + /// The entire bytecode program, this contains all of the + /// functions and their instructions. + pub instructions: Vec, + + /// The function context store, this is used to store the function contexts. + function_ctxs: HashMap, +} + +impl BytecodeBuilder { + pub fn new() -> Self { + Self { instructions: Vec::new(), function_ctxs: HashMap::new() } + } + + pub fn absorb(&mut self, func: &FunctionBuilder) -> usize { + let FunctionBuilder { body, labels, .. } = func; + let offset = self.instructions.len(); + + // Reserve space for the function body instructions. + self.instructions.reserve(body.len()); + + // We need to resolve all of the labels within the function body, i.e. they + // should now use the "global" offsets within the entire bytecode + // program, rather than the relative offsets within the function body. + for mut instruction in body.into_iter().copied() { + match &mut instruction { + Instruction::Jmp { location, .. } + | Instruction::JmpPos { location, .. } + | Instruction::JmpNeg { location, .. } + | Instruction::JmpZero { location, .. } => { + if let Operand::Label(label) = *location { + // Resolve the label offset to the global instruction offset + let function_label = labels[label].get(); + let global_offset = function_label + offset; + *location = Operand::Immediate(global_offset); + } + } + _ => {} + } + + self.instructions.push(instruction); + } + + offset + } + + pub fn add_function(&mut self, fn_builder: FunctionBuilder) { + // Absorb all of the function instructions into the bytecode builder. + let start = self.absorb(&fn_builder); + + let FunctionBuilder { abi, .. } = fn_builder; + let ctx = FunctionCtx { abi, offset: start }; + self.function_ctxs.insert(abi, ctx); + } + + pub fn add_instruction(&mut self, instruction: Instruction) { + self.instructions.push(instruction); + } + + /// Append a block of instructions to the bytecode builder. + /// + /// This method accepts a `Vec` which can be conveniently + /// created using the `inst!` macro. + /// + /// # Example + /// + /// ``` + /// use hash_vm::{builder::BytecodeBuilder, inst}; + /// + /// let mut builder = BytecodeBuilder::new(); + /// builder.append(inst! { + /// write64 [0], # [42]; + /// add64 [0], [1]; + /// }); + /// ``` + pub fn append(&mut self, instructions: Vec) { + self.instructions.extend(instructions); + } + + pub fn build(self) -> Vec { + self.instructions + } +} diff --git a/compiler/hash-vm/src/bytecode.rs b/compiler/hash-vm/src/bytecode/instruction.rs similarity index 92% rename from compiler/hash-vm/src/bytecode.rs rename to compiler/hash-vm/src/bytecode/instruction.rs index cd947f3b2..19f7b97a3 100644 --- a/compiler/hash-vm/src/bytecode.rs +++ b/compiler/hash-vm/src/bytecode/instruction.rs @@ -1,5 +1,4 @@ -//! Hash Compiler VM bytecode/instruction set. -use crate::register::Register; +use super::{op::Operand, register::Register}; /// The VM instruction set. #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -354,6 +353,26 @@ pub enum Instruction { l1: Register, l2: Register, }, + /// Write an 8bit literal value to a memory address. + Write8 { + reg: Register, + value: u8, + }, + /// Write a 16bit literal value to a memory address. + Write16 { + reg: Register, + value: u16, + }, + /// Write a 32bit literal value to a memory address. + Write32 { + reg: Register, + value: u32, + }, + /// Write a 64bit literal value to a memory address. + Write64 { + reg: Register, + value: u64, + }, /// Call a function at a given address Call { func: Register, @@ -371,25 +390,25 @@ pub enum Instruction { Return, /// Unconditional jump Jmp { - location: Register, + location: Operand, }, /// Jump if the comparison value yields a '> zero', or in other words the /// right is greater than left JmpPos { l1: Register, - location: Register, + location: Operand, }, /// Jump if the comparison value yields a '< zero', or in other words the /// left is greater than right JmpNeg { l1: Register, - location: Register, + location: Operand, }, /// Jump if the comparison yields a 'zero', or in other words the left and /// right are equal JmpZero { l1: Register, - location: Register, + location: Operand, }, /// Compare both values and store the result in `l1`. This will return /// either a one, zero or negative one. diff --git a/compiler/hash-vm/src/bytecode/mod.rs b/compiler/hash-vm/src/bytecode/mod.rs new file mode 100644 index 000000000..55dac5971 --- /dev/null +++ b/compiler/hash-vm/src/bytecode/mod.rs @@ -0,0 +1,9 @@ +//! Hash Compiler VM bytecode instruction set representation and related +//! logic. +pub mod instruction; +pub mod op; +pub mod register; + +pub use instruction::*; +pub use op::*; +pub use register::*; diff --git a/compiler/hash-vm/src/bytecode/op.rs b/compiler/hash-vm/src/bytecode/op.rs new file mode 100644 index 000000000..f8f1c235e --- /dev/null +++ b/compiler/hash-vm/src/bytecode/op.rs @@ -0,0 +1,102 @@ +use hash_utils::index_vec::Idx; + +use super::register::{Register, RegisterSet}; + +/// A 24-bit label offset, representing an instruction offset within a function. +/// This allows for up to 16,777,215 instructions per function while saving +/// memory. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct LabelOffset([u8; 3]); + +impl LabelOffset { + /// The maximum value that can be represented in 24 bits. + pub const MAX: usize = 0x00FF_FFFF; + + /// Create a new `LabelOffset` from a `usize`. + /// + /// # Panics + /// Panics if `offset` exceeds 24-bit range (16,777,215). + pub fn new(offset: usize) -> Self { + assert!(offset <= Self::MAX, "offset {} exceeds 24-bit range (max: {})", offset, Self::MAX); + Self([(offset & 0xFF) as u8, ((offset >> 8) & 0xFF) as u8, ((offset >> 16) & 0xFF) as u8]) + } + + /// Try to create a new `LabelOffset` from a `usize`. + /// Returns `None` if the offset exceeds 24-bit range. + pub fn try_new(offset: usize) -> Option { + if offset <= Self::MAX { + Some(Self([ + (offset & 0xFF) as u8, + ((offset >> 8) & 0xFF) as u8, + ((offset >> 16) & 0xFF) as u8, + ])) + } else { + None + } + } + + /// Get the offset value as a `usize`. + pub fn get(self) -> usize { + self.0[0] as usize | ((self.0[1] as usize) << 8) | ((self.0[2] as usize) << 16) + } +} + +/// Implement the `Idx` trait to allow `LabelOffset` to be used with `IndexVec`. +impl Idx for LabelOffset { + fn from_usize(idx: usize) -> Self { + Self::new(idx) + } + + fn index(self) -> usize { + self.get() + } +} + +/// A type that can either be a [Register] or a [LabelOffset]. +/// +/// This is used in instructions that can take either a register or a label +/// as an argument, such as jump instructions. +/// +/// This is used to initially represent the instruction before it is resolved +/// into a concrete instruction with only registers (after label resolution). +/// When the function body is finalised, all [LabelOffset]s will be replaced +/// with the corresponding [Register]s that hold the instruction addresses. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Operand { + /// A register operand. + Register(Register), + + /// A label offset within the function body. + Label(LabelOffset), + + /// Literal immediate value. + Immediate(usize), +} + +impl Operand { + /// Check if the operand is a register. + pub fn is_register(&self) -> bool { + matches!(self, Operand::Register(_)) + } + + /// Check if the operand is a label. + pub fn is_label(&self) -> bool { + matches!(self, Operand::Label(_)) + } + + /// Check if the operand is an immediate value. + pub fn is_immediate(&self) -> bool { + matches!(self, Operand::Immediate(_)) + } + + pub fn as_literal_usize(&self, registers: &RegisterSet) -> usize { + match self { + Operand::Register(reg) => { + let reg_value = registers.get_register64(*reg); + reg_value as usize + } + Operand::Immediate(value) => *value, + Operand::Label(_) => panic!("Cannot convert label operand to literal usize"), + } + } +} diff --git a/compiler/hash-vm/src/register.rs b/compiler/hash-vm/src/bytecode/register.rs similarity index 95% rename from compiler/hash-vm/src/register.rs rename to compiler/hash-vm/src/bytecode/register.rs index 4b8a7f937..3314d6b3b 100644 --- a/compiler/hash-vm/src/register.rs +++ b/compiler/hash-vm/src/bytecode/register.rs @@ -183,3 +183,18 @@ impl RegisterSet { reg[7] } } + +/// Macro to create a new register. +/// +/// # Example +/// ``` +/// use hash_vm::bytecode::register::Register; +/// let r0 = r!(0); +/// let r1 = r!(1); +/// ``` +#[macro_export] +macro_rules! r { + ($index:expr) => { + Register::new($index) + }; +} diff --git a/compiler/hash-vm/src/bytecode_builder.rs b/compiler/hash-vm/src/bytecode_builder.rs deleted file mode 100644 index 8ca04aa33..000000000 --- a/compiler/hash-vm/src/bytecode_builder.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Hash Compiler VM bytecode building module. -//! -//! This module holds utilities and data structures to generate bytecode and -//! store it in the format that the VM expects. -use crate::bytecode::Instruction; - -#[derive(Debug, Default)] -pub struct BytecodeBuilder { - instructions: Vec, -} - -impl BytecodeBuilder { - pub fn add_instruction(&mut self, instruction: Instruction) -> &mut Self { - self.instructions.push(instruction); - self - } -} - -impl From for Vec { - fn from(builder: BytecodeBuilder) -> Self { - builder.instructions - } -} diff --git a/compiler/hash-vm/src/lib.rs b/compiler/hash-vm/src/lib.rs index b0d905c71..40229cea2 100644 --- a/compiler/hash-vm/src/lib.rs +++ b/compiler/hash-vm/src/lib.rs @@ -1,10 +1,10 @@ //! Hash Compiler VM crate. +#![feature(if_let_guard)] + mod heap; mod stack; +pub mod builder; pub mod bytecode; -pub mod register; - -pub mod bytecode_builder; pub mod error; pub mod vm; diff --git a/compiler/hash-vm/src/vm.rs b/compiler/hash-vm/src/vm.rs index deda97013..189efb5a8 100644 --- a/compiler/hash-vm/src/vm.rs +++ b/compiler/hash-vm/src/vm.rs @@ -3,9 +3,11 @@ use std::cell::Cell; use crate::{ - bytecode::Instruction, + bytecode::{ + Instruction, + register::{Register, RegisterSet}, + }, error::RuntimeError, - register::{Register, RegisterSet}, stack::Stack, }; @@ -60,6 +62,11 @@ impl Interpreter { } } + /// Get a reference to the current program space. + pub fn program(&self) -> &[Instruction] { + &self.instructions + } + fn run_next_instruction(&mut self) -> Result<(), RuntimeError> { let ip = self.get_instruction_pointer(); let instruction = self.instructions.get(ip).unwrap(); @@ -704,10 +711,8 @@ impl Interpreter { self.registers.set_register64(dest, value); } Instruction::Jmp { location } => { - // @@Correctness: is this the correct conversion?? - let value = self.registers.get_register64(location).try_into().unwrap(); - // Arbitrarily jump to the specified location in the register + let value = location.as_literal_usize(&self.registers); self.set_instruction_pointer(value); } Instruction::JmpPos { l1, location } => { @@ -716,7 +721,7 @@ impl Interpreter { // Arbitrarily jump to the specified location in the register if the comparison // value is less than zero or in other words, negative... if r1 > 0 { - let value = self.registers.get_register64(location).try_into().unwrap(); + let value = location.as_literal_usize(&self.registers); self.set_instruction_pointer(value); } } @@ -726,7 +731,7 @@ impl Interpreter { // Arbitrarily jump to the specified location in the register if the comparison // value is less than zero or in other words, negative... if r1 < 0 { - let value = self.registers.get_register64(location).try_into().unwrap(); + let value = location.as_literal_usize(&self.registers); self.set_instruction_pointer(value); } } @@ -736,7 +741,7 @@ impl Interpreter { // Arbitrarily jump to the specified location in the register if the comparison // value is less than zero or in other words, negative... if r1 == 0 { - let value = self.registers.get_register64(location).try_into().unwrap(); + let value = location.as_literal_usize(&self.registers); self.set_instruction_pointer(value); } } @@ -788,8 +793,6 @@ impl Interpreter { let value = self.registers.get_register_8b(l1); self.stack.push64(value)?; } - - // Function related instructions Instruction::Call { func } => { // Save the ip onto the stack self.stack.push64( @@ -830,6 +833,18 @@ impl Interpreter { u64::from_be_bytes(*self.stack.pop64()?), ); } + Instruction::Write8 { reg, value } => { + self.registers.set_register8(reg, value); + } + Instruction::Write16 { reg, value } => { + self.registers.set_register16(reg, value); + } + Instruction::Write32 { reg, value } => { + self.registers.set_register32(reg, value); + } + Instruction::Write64 { reg, value } => { + self.registers.set_register64(reg, value); + } Instruction::Syscall { .. } => todo!(), }; @@ -859,7 +874,7 @@ impl Interpreter { } pub fn run(&mut self) -> Result<(), RuntimeError> { - let ip = self.get_instruction_pointer(); + let mut ip = self.get_instruction_pointer(); while ip < self.instructions.len() { // Ok, now we need to run the current instruction, so we pass it into the @@ -875,7 +890,8 @@ impl Interpreter { return Ok(()); } - self.set_instruction_pointer(ip + 1); + ip += 1; + self.set_instruction_pointer(ip); } Ok(()) diff --git a/compiler/hash-vm/tests/vm.rs b/compiler/hash-vm/tests/vm.rs index 47ce06e02..3b140afc1 100644 --- a/compiler/hash-vm/tests/vm.rs +++ b/compiler/hash-vm/tests/vm.rs @@ -1,24 +1,24 @@ //! Hash Compiler VM tests. -use hash_vm::{ - bytecode::Instruction, bytecode_builder::BytecodeBuilder, register::Register, vm::Interpreter, -}; +use hash_vm::{builder::BytecodeBuilder, bytecode::register::Register, inst, r, vm::Interpreter}; #[test] fn push_two_and_add() { let mut builder = BytecodeBuilder::default(); - let l1 = Register::new(0); - let l2 = Register::new(1); - - builder.add_instruction(Instruction::Add16 { l1, l2 }); + let r0 = r!(0); + builder.append(inst! { + write16 [0], #[2]; + write16 [1], #[2]; + add16 [0], [1]; + }); let mut vm = Interpreter::new(); - vm.set_program(builder.into()); - - // set registers l1 and l2 to appropriate values... - vm.registers_mut().set_register16(l1, 2); - vm.registers_mut().set_register16(l2, 2); + // @@Todo: this is definitely not correct, as we'd + // still need to ensure that we've got all of the right + // labels and offsets set up within the bytecode, i.e. + // function addresses, block label addresses. + vm.set_program(builder.instructions); vm.run().unwrap(); - assert_eq!(vm.registers().get_register16(l1), 4); + assert_eq!(vm.registers().get_register16(r0), 4); }