diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs
index 174444c2fb..623737957a 100644
--- a/db4-storage/src/lib.rs
+++ b/db4-storage/src/lib.rs
@@ -54,6 +54,7 @@ pub type GS
= GraphPropSegmentView
;
pub type Layer
= GraphStore, ES, GS
, P>;
pub type Wal = ::Wal;
+pub type ControlFile = ::ControlFile;
pub type Config = ::Config;
pub type GIDResolver = MappingResolver;
diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs
index 04f2994159..dcc1c07b6f 100644
--- a/db4-storage/src/pages/mod.rs
+++ b/db4-storage/src/pages/mod.rs
@@ -3,7 +3,12 @@ use crate::{
api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps},
error::StorageError,
pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage},
- persist::{config::ConfigOps, strategy::PersistenceStrategy},
+ persist::{
+ config::ConfigOps,
+ control_file::{ControlFileOps, DBState},
+ strategy::PersistenceStrategy,
+ },
+ properties::props_meta_writer::PropsMetaWriter,
segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment},
state::StateIndex,
wal::{GraphWalOps, WalOps},
@@ -349,32 +354,38 @@ impl<
> Drop for GraphStore
{
fn drop(&mut self) {
+ let wal = self.ext.wal();
+ let control_file = self.ext.control_file();
+
match self.flush() {
Ok(_) => {
- let wal = self.ext.wal();
-
- // INVARIANTS:
- // 1. No new writes can occur since we are in a drop.
- // 2. flush() has persisted all the segments to disk.
- //
- // Thus, we can safely discard all records with LSN <= latest_lsn_on_disk
- // by rotating the WAL.
- let latest_lsn_on_disk = wal.next_lsn() - 1;
-
- if let Err(e) = wal.rotate(latest_lsn_on_disk) {
- eprintln!("Failed to rotate WAL in drop: {}", e);
+ // Log a checkpoint record in the WAL, indicating that the DB was shutdown
+ // with all the segments flushed to disk.
+ // On startup, recovery is skipped since there are no pending writes to replay.
+ let checkpoint_lsn = match wal.log_shutdown_checkpoint() {
+ Ok(lsn) => lsn,
+ Err(err) => {
+ eprintln!("Failed to log shutdown checkpoint in drop: {err}");
+ return;
+ }
+ };
+
+ // Flush up to the end of the WAL stream.
+ let flush_lsn = wal.position();
+
+ if let Err(err) = wal.flush(flush_lsn) {
+ eprintln!("Failed to flush checkpoint record in drop: {err}");
+ return;
}
- // FIXME: If the process crashes here after rotation, we lose the
- // checkpoint record. Write next LSN to a separate file before rotation.
+ // Record the checkpoint and shutdown state and write control file to disk.
+ control_file.set_checkpoint(checkpoint_lsn);
+ control_file.set_db_state(DBState::Shutdown);
- // Log a checkpoint record so we can restore the next LSN after reload.
- let checkpoint_lsn = wal
- .log_checkpoint(latest_lsn_on_disk)
- .expect("Failed to log checkpoint in drop");
-
- wal.flush(checkpoint_lsn)
- .expect("Failed to flush checkpoint record in drop");
+ if let Err(err) = control_file.save() {
+ eprintln!("Failed to save control file in drop: {err}");
+ return;
+ }
}
Err(err) => {
eprintln!("Failed to flush storage in drop: {err}")
diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs
index 94eef349df..80435eaa16 100644
--- a/db4-storage/src/persist/config.rs
+++ b/db4-storage/src/persist/config.rs
@@ -9,7 +9,8 @@ use tracing::error;
pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17
pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20
-pub const CONFIG_FILE: &str = "config.json";
+
+const CONFIG_FILE_NAME: &str = "config.json";
pub trait ConfigOps: Serialize + DeserializeOwned + Args + Sized {
fn max_node_page_len(&self) -> u32;
@@ -25,14 +26,14 @@ pub trait ConfigOps: Serialize + DeserializeOwned + Args + Sized {
fn with_node_types(&self, node_types: impl IntoIterator- >) -> Self;
fn load_from_dir(dir: &Path) -> Result {
- let config_file = dir.join(CONFIG_FILE);
+ let config_file = dir.join(CONFIG_FILE_NAME);
let config_file = std::fs::File::open(config_file)?;
let config = serde_json::from_reader(config_file)?;
Ok(config)
}
fn save_to_dir(&self, dir: &Path) -> Result<(), StorageError> {
- let config_file = dir.join(CONFIG_FILE);
+ let config_file = dir.join(CONFIG_FILE_NAME);
let config_file = std::fs::File::create(&config_file)?;
serde_json::to_writer_pretty(config_file, self)?;
Ok(())
diff --git a/db4-storage/src/persist/control_file.rs b/db4-storage/src/persist/control_file.rs
new file mode 100644
index 0000000000..9c8c942884
--- /dev/null
+++ b/db4-storage/src/persist/control_file.rs
@@ -0,0 +1,53 @@
+use crate::{error::StorageError, wal::LSN};
+use serde::{Deserialize, Serialize};
+use std::path::Path;
+
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+pub enum DBState {
+ Running,
+ Shutdown,
+ CrashRecovery,
+ NotSupported,
+}
+
+// Starting value for `last_checkpoint` in the control file.
+pub const LAST_CHECKPOINT_INIT: LSN = 0;
+
+pub trait ControlFileOps: Sized {
+ fn load(dir: &Path) -> Result;
+
+ fn save(&self) -> Result<(), StorageError>;
+
+ fn db_state(&self) -> DBState;
+
+ fn last_checkpoint(&self) -> LSN;
+
+ fn set_db_state(&self, state: DBState);
+
+ fn set_checkpoint(&self, lsn: LSN);
+}
+
+#[derive(Debug, Clone)]
+pub struct NoControlFile;
+
+impl ControlFileOps for NoControlFile {
+ fn load(_dir: &Path) -> Result {
+ Ok(NoControlFile)
+ }
+
+ fn save(&self) -> Result<(), StorageError> {
+ Ok(())
+ }
+
+ fn db_state(&self) -> DBState {
+ DBState::NotSupported
+ }
+
+ fn last_checkpoint(&self) -> LSN {
+ 0
+ }
+
+ fn set_db_state(&self, state: DBState) {}
+
+ fn set_checkpoint(&self, lsn: LSN) {}
+}
diff --git a/db4-storage/src/persist/mod.rs b/db4-storage/src/persist/mod.rs
index 43275c62a7..7609d5b63e 100644
--- a/db4-storage/src/persist/mod.rs
+++ b/db4-storage/src/persist/mod.rs
@@ -1,2 +1,3 @@
pub mod config;
+pub mod control_file;
pub mod strategy;
diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs
index 5f1f7aad07..f733365aab 100644
--- a/db4-storage/src/persist/strategy.rs
+++ b/db4-storage/src/persist/strategy.rs
@@ -1,7 +1,10 @@
use crate::{
api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps},
error::StorageError,
- persist::config::{BaseConfig, ConfigOps},
+ persist::{
+ config::{BaseConfig, ConfigOps},
+ control_file::{ControlFileOps, NoControlFile},
+ },
segments::{
edge::segment::{EdgeSegmentView, MemEdgeSegment},
graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment},
@@ -25,6 +28,7 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static {
type GS: GraphPropSegmentOps;
type Wal: WalOps + GraphWalOps;
type Config: ConfigOps;
+ type ControlFile: ControlFileOps;
fn new(config: Self::Config, graph_dir: Option<&Path>) -> Result;
@@ -38,6 +42,8 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static {
fn wal(&self) -> &Self::Wal;
+ fn control_file(&self) -> &Self::ControlFile;
+
/// Called after every write and checks memory limits to decide if a flush is needed
fn persist_node_segment>(
&self,
@@ -81,6 +87,7 @@ pub struct NoOpStrategy {
config: BaseConfig,
memory_tracker: Arc,
wal: NoWal,
+ control_file: NoControlFile,
}
impl PersistenceStrategy for NoOpStrategy {
@@ -89,12 +96,14 @@ impl PersistenceStrategy for NoOpStrategy {
type GS = GraphPropSegmentView;
type Wal = NoWal;
type Config = BaseConfig;
+ type ControlFile = NoControlFile;
fn new(config: BaseConfig, _graph_dir: Option<&Path>) -> Result {
Ok(Self {
config,
- memory_tracker: Arc::new(AtomicUsize::new(0)),
wal: NoWal,
+ control_file: NoControlFile,
+ memory_tracker: Arc::new(AtomicUsize::new(0)),
})
}
@@ -118,6 +127,10 @@ impl PersistenceStrategy for NoOpStrategy {
&self.wal
}
+ fn control_file(&self) -> &Self::ControlFile {
+ &self.control_file
+ }
+
fn persist_node_segment>(
&self,
_node_page: &Self::NS,
diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs
index 18d33a116b..002723af7d 100644
--- a/db4-storage/src/wal/entry.rs
+++ b/db4-storage/src/wal/entry.rs
@@ -103,15 +103,31 @@ impl GraphWalOps for NoWal {
Ok(0)
}
- fn log_checkpoint(&self, _lsn: LSN) -> Result {
+ fn log_checkpoint(&self, _redo: LSN) -> Result {
Ok(0)
}
- fn replay_iter(&self) -> impl Iterator
- > {
- std::iter::empty()
+ fn log_shutdown_checkpoint(&self) -> Result {
+ Ok(0)
+ }
+
+ fn read_checkpoint(&self, _lsn: LSN) -> Result {
+ Err(StorageError::GenericFailure(
+ "read_checkpoint is not supported for NoWAL".to_string(),
+ ))
}
- fn replay_to_graph(&self, _graph: &mut G) -> Result<(), StorageError> {
+ fn read_shutdown_checkpoint(&self, _lsn: LSN) -> Result {
+ Err(StorageError::GenericFailure(
+ "read_shutdown_checkpoint is not supported for NoWAL".to_string(),
+ ))
+ }
+
+ fn replay_to_graph(
+ &self,
+ _graph: &mut G,
+ _start: LSN,
+ ) -> Result {
panic!("NoWAL does not support replay")
}
}
diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs
index 9545bdfc33..e933504121 100644
--- a/db4-storage/src/wal/mod.rs
+++ b/db4-storage/src/wal/mod.rs
@@ -4,7 +4,6 @@ use raphtory_core::{
entities::{EID, GID, VID},
storage::timeindex::EventTime,
};
-use std::path::Path;
pub mod entry;
pub mod no_wal;
@@ -14,17 +13,6 @@ pub type TransactionID = u64;
/// Core Wal methods.
pub trait WalOps {
- type Config;
-
- fn new(dir: Option<&Path>, config: Self::Config) -> Result
- where
- Self: Sized;
-
- /// Loads an existing WAL file from the given directory in append mode.
- fn load(dir: Option<&Path>, config: Self::Config) -> Result
- where
- Self: Sized;
-
/// Appends data to the WAL and returns the assigned LSN.
fn append(&self, data: &[u8]) -> Result;
@@ -32,18 +20,18 @@ pub trait WalOps {
/// Returns immediately if the given LSN is already flushed to disk.
fn flush(&self, lsn: LSN) -> Result<(), StorageError>;
- /// Rotates the underlying WAL file.
- /// All records with LSN > `cutoff_lsn` are copied to the new WAL file.
- fn rotate(&self, cutoff_lsn: LSN) -> Result<(), StorageError>;
+ /// Reads the WAL record at the given LSN.
+ /// Returns `Ok(None)` if there is no record at that LSN.
+ fn read(&self, lsn: LSN) -> Result