From 4168bb8bd51188ed4b8a300e0a2c467ec8f827f3 Mon Sep 17 00:00:00 2001 From: Malcolm Greaves Date: Tue, 28 Apr 2026 17:22:46 -0700 Subject: [PATCH] Add merkle tree interfaces for reading & writing New interface traits: `MerkleReader`, `MerkleWriter`, and unifying `MerkleStore`. **`MerkleStore`** A `MerkleStore` is just a `MerkleReader` + `MerkleWriter` where their `Error` type is the same. Includes a blanket impl. for any type that has a reader & writer impl. with equal `Error` types. **`MerkleReader`** Can retrieve Merkle tree nodes from storage given a hash. Supports existence checks, retrieving the node, and retrieving a node's children, if applicable. **`MerkleWriter`** Produces a `MerkleWriteSession` that allows for writing arbitrary Merkle tree nodes to the underlying physical store. Within the write session, writing a node creates a new `NodeWriteSession` instance. This instance can write a node as well as a node's children. Both session types have `finish`, which ensures their writes go to the underlying physical store. These tiered write sessions match how the Oxen code writes Merkle tree nodes. Additionally, this design allows for implementations to be eager, buffer writes in- memory, or provide transaction semantics. --- crates/lib/src/model/merkle_tree.rs | 11 +++ .../src/model/merkle_tree/merkle_reader.rs | 84 +++++++++++++++++ .../src/model/merkle_tree/merkle_writer.rs | 93 +++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 crates/lib/src/model/merkle_tree/merkle_reader.rs create mode 100644 crates/lib/src/model/merkle_tree/merkle_writer.rs diff --git a/crates/lib/src/model/merkle_tree.rs b/crates/lib/src/model/merkle_tree.rs index 004e5db7b..cddb2efaf 100644 --- a/crates/lib/src/model/merkle_tree.rs +++ b/crates/lib/src/model/merkle_tree.rs @@ -1,9 +1,20 @@ pub mod merkle_hash; +pub mod merkle_reader; +pub mod merkle_writer; pub mod node; pub mod node_type; pub use crate::model::merkle_tree::merkle_hash::MerkleHash; +pub use crate::model::merkle_tree::merkle_reader::MerkleReader; +pub use crate::model::merkle_tree::merkle_writer::MerkleWriter; pub use crate::model::merkle_tree::node::merkle_tree_node_cache; pub use crate::model::merkle_tree::node_type::{ MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode, }; + +/// A complete Merkle tree store supports reading and writing with a shared error type. +pub trait MerkleStore: MerkleReader + MerkleWriter::Error> {} + +/// Any type that implements the Merkle reading and writing traits is automatically an instance +/// of a MerkleStore, provided that the error types in both the reader & writer align. +impl MerkleStore for T where T: MerkleReader + MerkleWriter::Error> {} diff --git a/crates/lib/src/model/merkle_tree/merkle_reader.rs b/crates/lib/src/model/merkle_tree/merkle_reader.rs new file mode 100644 index 000000000..e36413798 --- /dev/null +++ b/crates/lib/src/model/merkle_tree/merkle_reader.rs @@ -0,0 +1,84 @@ +use crate::error::IntoOxenError; +use crate::model::{ + MerkleHash, MerkleTreeNodeType, + merkle_tree::node::{EMerkleTreeNode, MerkleTreeNode}, +}; + +/// Interface for read-only access to Merkle tree nodes. +pub trait MerkleReader: Send + Sync { + /// The error type for the Merkle tree's underlying storage layer. + /// + /// Backends may use whichever error type is natural for their storage + /// (e.g. `MerkleDbError` for the file backend). The `Into` + /// bound lets callers that return `Result<_, OxenError>` use `?` directly. + type Error: std::error::Error + IntoOxenError; + + /// True if there is some node with the given hash. False otherwise. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn exists(&self, hash: &MerkleHash) -> Result; + + /// Retrieve the node record for the given hash, if it exists. None means no such node exists. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn get_node(&self, hash: &MerkleHash) -> Result, Self::Error>; + + /// Retrieve the children of the node for the given hash, if it exists and if it is a directory node. + /// If the node represents a file, then an empty list is always returned. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn get_children( + &self, + hash: &MerkleHash, + ) -> Result, Self::Error>; +} + +/// Metadata returned when reading a single node. +pub struct MerkleNodeRecord { + hash: MerkleHash, + dtype: MerkleTreeNodeType, + parent_id: Option, + node: EMerkleTreeNode, + num_children: u64, +} + +impl MerkleNodeRecord { + pub fn new( + hash: MerkleHash, + dtype: MerkleTreeNodeType, + parent_id: Option, + node: EMerkleTreeNode, + num_children: u64, + ) -> Self { + Self { + hash, + dtype, + parent_id, + node, + num_children, + } + } + + pub fn hash(&self) -> &MerkleHash { + &self.hash + } + + pub fn dtype(&self) -> &MerkleTreeNodeType { + &self.dtype + } + + pub fn parent_id(&self) -> Option<&MerkleHash> { + self.parent_id.as_ref() + } + + pub fn node(&self) -> &EMerkleTreeNode { + &self.node + } + + pub fn num_children(&self) -> u64 { + self.num_children + } + + /// Consume this record and return its `EMerkleTreeNode`, avoiding a clone + /// for callers that only need the owned node value. + pub fn into_node(self) -> EMerkleTreeNode { + self.node + } +} diff --git a/crates/lib/src/model/merkle_tree/merkle_writer.rs b/crates/lib/src/model/merkle_tree/merkle_writer.rs new file mode 100644 index 000000000..e733084b8 --- /dev/null +++ b/crates/lib/src/model/merkle_tree/merkle_writer.rs @@ -0,0 +1,93 @@ +use crate::error::IntoOxenError; +use crate::model::{MerkleHash, TMerkleTreeNode}; + +/// Interface for writing to a Merkle tree store. +pub trait MerkleWriter: Send + Sync { + /// The error type for the Merkle tree's underlying storage layer. + /// + /// Backends may use whichever error type is natural for their storage + /// (e.g. [`MerkleDbError`] for the [`FileBackend`]). The `Into` + /// bound on the associated type propagates as an implied bound at every + /// use site, so generic callers can convert errors via + /// `?` with no additional `where` clauses. + type Error: std::error::Error + IntoOxenError; + + /// The write session that manages writing multiple nodes to the store. + type Session<'a>: MerkleWriteSession + where + Self: 'a; + + /// Create a new write session to write many changed Merkle tree nodes to the store. + /// + /// To ensure that changes are persisted, callers must call [`MerkleWriteSession::finish`] on the returned session. + /// Correct use of the returned session is to create a [`NodeWriteSession`] for each node to be + /// written, and then call [`NodeWriteSession::finish`] on that session when complete with the node. + fn begin(&self) -> Result, Self::Error>; +} + +/// A write session for writing multiple nodes to the Merkle tree store. +/// +/// A [`MerkleWriteSession`] is used to create multiple [`NodeWriteSession`]s, each of which +/// represents a single node being written to the store. Typical usage is to create a single +/// [`MerkleWriteSession`] when committing repository changes. From this one write session, +/// callers will create multiple [`NodeWriteSession`]s to write the nodes they need to store. +/// Each [`NodeWriteSession`] must have its [`finish`] called to finalize the written node +/// information. Once all nodes have been written, the [`finish`] method of the [`MerkleWriteSession`] +/// must be called to persist the changes to the store. +/// +/// Persistence and eagerness of writes are implementation details. Implementations may choose +/// to buffer writes or write immediately to the store when [`create_node`] and [`add_child`] +/// are called. The invariant is that [`finish`] must be called to **ensure** that writes are +/// persisted. An implementation may choose to e.g. have a transaction mechanism to roll-back +/// changes on `Err`. However, implementations are not required to support this. +pub trait MerkleWriteSession { + /// The error type for the Merkle tree's underlying storage layer. + /// Must be convertible into an [`OxenError`]. + type Error: std::error::Error + IntoOxenError; + + /// The write session that manages writing a single node's information to the store. + type NodeSession<'b>: NodeWriteSession + where + Self: 'b; + + /// Begin the process of writing the node to the Merkle tree store. + /// + /// The returned node write session is used to add children, if the node is a directory + /// or vnode. Callers are responsible for calling `finish` on the returned session + /// to ensure that their writes will be made available to the Merkle tree store. + /// + /// Note that any written nodes are not required to be persisted to the store until + /// _this_ write session's [`finish`] is called. + fn create_node<'b, N: TMerkleTreeNode>( + &'b self, + node: &N, + parent_id: Option, + ) -> Result, Self::Error>; + + /// Ensure that all content from all finished node write sessions have been written to the + /// Merkle tree store. Consumes the session: any active [`NodeWriteSession`]s borrowing from + /// this one must already have been finished (and thus dropped) before this is called — the + /// borrow checker enforces that invariant. + fn finish(self) -> Result<(), Self::Error>; +} + +/// A write session for a single node being constructed. +/// +/// Implementations may buffer the `node` and `children` information in memory or choose to write +/// the data to the store eagerly. However, if [`finish`] is called and returns `Ok`, then the +/// guarantee is that all node and child information must be persisted to the store. +pub trait NodeWriteSession { + /// The error type for the Merkle tree's underlying storage layer. + /// Must be convertible into an [`OxenError`]. + type Error: std::error::Error + IntoOxenError; + + /// The hash of the node being written in this session. + fn node_id(&self) -> &MerkleHash; + + /// Add a child to the current node. + fn add_child(&mut self, child: &N) -> Result<(), Self::Error>; + + /// Ensure the node and its children have been written to the Merkle tree store. Consumes + /// the node session; releases the borrow on the parent [`MerkleWriteSession`]. + fn finish(self) -> Result<(), Self::Error>; +}