Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs-internal/engine/SQLITE_METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
- `sqlite_commit_dirty_page_count{path}`: Histogram of dirty page counts per commit path.
- `sqlite_commit_dirty_bytes{path}`: Histogram of raw dirty-page bytes per commit path.
- `sqlite_udb_ops_per_commit{path}`: Histogram of UniversalDB operations per commit path.
- `sqlite_commit_envoy_dispatch_duration_seconds`: Pegboard-envoy histogram for websocket frame arrival to `depot` dispatch.
- `sqlite_commit_envoy_response_duration_seconds`: Pegboard-envoy histogram for `depot` return to websocket response send.
- `envoy_sqlite_commit_dispatch_duration_seconds`: Pegboard-envoy histogram for websocket frame arrival to `depot` dispatch.
- `envoy_sqlite_commit_response_duration_seconds`: Pegboard-envoy histogram for `depot` return to websocket response send.
- `sqlite_commit_phases`: Actor inspector labeled timing metric exposed from `/inspector/metrics`. Values are `request_build`, `serialize`, `transport`, and `state_update`.

## Scrape Points
Expand All @@ -23,8 +23,8 @@

## Diagnosis

- High `decode_request` or `sqlite_commit_envoy_dispatch_duration_seconds` usually means envoy-side validation or actor lookup is slow before storage work starts.
- High `decode_request` or `envoy_sqlite_commit_dispatch_duration_seconds` usually means envoy-side validation or actor lookup is slow before storage work starts.
- High `meta_read` or `pidx_read` points at UniversalDB read pressure or cache misses.
- High `ltx_encode` means commit encoding and compression are doing real work. Check dirty page counts and raw dirty bytes together.
- High `udb_write`, `meta_write`, or `sqlite_commit_envoy_response_duration_seconds` points at write-path latency after encode.
- High `udb_write`, `meta_write`, or `envoy_sqlite_commit_response_duration_seconds` points at write-path latency after encode.
- A healthy actor should show non-zero `sqlite_commit_phases` totals after commits in `/inspector/metrics`. If SQL runs but those timings stay zero, the native VFS metrics path is broken.
4 changes: 1 addition & 3 deletions engine/packages/metrics/src/registry.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use prometheus::*;

lazy_static::lazy_static! {
pub static ref REGISTRY: Registry = Registry::new_custom(
Some("rivet".to_string()),
Some(labels! { })).unwrap();
pub static ref REGISTRY: Registry = Registry::new_custom(None, Some(labels! { })).unwrap();
}
26 changes: 13 additions & 13 deletions engine/packages/pegboard-envoy/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,88 +2,88 @@ use rivet_metrics::{BUCKETS, REGISTRY, prometheus::*};

lazy_static::lazy_static! {
pub static ref CONNECTION_TOTAL: IntCounterVec = register_int_counter_vec_with_registry!(
"pegboard_envoy_connection_total",
"envoy_connection_total",
"Count of envoy connections opened.",
&["namespace_id", "pool_name", "protocol_version"],
*REGISTRY
).unwrap();

pub static ref EVICTION_TOTAL: IntCounterVec = register_int_counter_vec_with_registry!(
"pegboard_envoy_eviction_total",
"envoy_eviction_total",
"Count of envoy connections evicted.",
&["namespace_id", "pool_name", "protocol_version"],
*REGISTRY
).unwrap();

pub static ref CONNECTION_ACTIVE: IntGaugeVec = register_int_gauge_vec_with_registry!(
"pegboard_envoy_connection_active",
"envoy_connection_active",
"Count of envoy connections currently active.",
&["namespace_id", "pool_name", "protocol_version"],
*REGISTRY
).unwrap();

pub static ref RECEIVE_INIT_PACKET_DURATION: HistogramVec = register_histogram_vec_with_registry!(
"pegboard_envoy_receive_init_packet_duration",
"envoy_receive_init_packet_duration",
"Duration to receive the init packet for a envoy connection.",
&["namespace_id", "pool_name"],
BUCKETS.to_vec(),
*REGISTRY
).unwrap();

pub static ref EVENT_MULTIPLEXER_COUNT: IntGauge = register_int_gauge_with_registry!(
"pegboard_envoy_event_multiplexer_count",
"envoy_event_multiplexer_count",
"Number of active actor event multiplexers.",
*REGISTRY
).unwrap();

pub static ref INGESTED_EVENTS_TOTAL: IntCounter = register_int_counter_with_registry!(
"pegboard_envoy_ingested_events_total",
"envoy_ingested_events_total",
"Count of actor events.",
*REGISTRY
).unwrap();

pub static ref SQLITE_COMMIT_ENVOY_DISPATCH_DURATION: Histogram = register_histogram_with_registry!(
"sqlite_commit_envoy_dispatch_duration_seconds",
"envoy_sqlite_commit_dispatch_duration_seconds",
"Duration from sqlite commit frame arrival until depot dispatch.",
BUCKETS.to_vec(),
*REGISTRY
).unwrap();

pub static ref SQLITE_COMMIT_ENVOY_RESPONSE_DURATION: Histogram = register_histogram_with_registry!(
"sqlite_commit_envoy_response_duration_seconds",
"envoy_sqlite_commit_response_duration_seconds",
"Duration from depot commit return until the websocket response frame is sent.",
BUCKETS.to_vec(),
*REGISTRY
).unwrap();

pub static ref SQLITE_MIGRATION_ATTEMPTS_TOTAL: IntCounter = register_int_counter_with_registry!(
"pegboard_envoy_sqlite_migration_attempts_total",
"envoy_sqlite_migration_attempts_total",
"Total number of sqlite v1 to v2 migration attempts.",
*REGISTRY
).unwrap();

pub static ref SQLITE_MIGRATION_SUCCESSES_TOTAL: IntCounter = register_int_counter_with_registry!(
"pegboard_envoy_sqlite_migration_successes_total",
"envoy_sqlite_migration_successes_total",
"Total number of sqlite v1 to v2 migrations that completed successfully.",
*REGISTRY
).unwrap();

pub static ref SQLITE_MIGRATION_FAILURES_TOTAL: IntCounterVec = register_int_counter_vec_with_registry!(
"pegboard_envoy_sqlite_migration_failures_total",
"envoy_sqlite_migration_failures_total",
"Total number of sqlite v1 to v2 migration failures by phase.",
&["phase"],
*REGISTRY
).unwrap();

pub static ref SQLITE_MIGRATION_DURATION: Histogram = register_histogram_with_registry!(
"pegboard_envoy_sqlite_migration_duration_seconds",
"envoy_sqlite_migration_duration_seconds",
"Duration of sqlite v1 to v2 migrations.",
BUCKETS.to_vec(),
*REGISTRY
).unwrap();

pub static ref SQLITE_MIGRATION_PAGES: Histogram = register_histogram_with_registry!(
"pegboard_envoy_sqlite_migration_pages",
"envoy_sqlite_migration_pages",
"Number of pages imported during sqlite v1 to v2 migration.",
BUCKETS.to_vec(),
*REGISTRY
Expand Down
4 changes: 2 additions & 2 deletions engine/packages/pegboard/src/actor_kv/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ use rivet_metrics::{BUCKETS, REGISTRY, prometheus::*};

lazy_static::lazy_static! {
pub static ref ACTOR_KV_OPERATION_DURATION: HistogramVec = register_histogram_vec_with_registry!(
"actor_kv_operation_duration_seconds",
"pegboard_actor_kv_operation_duration_seconds",
"Duration of actor KV operations including UDB transaction.",
&["op"],
BUCKETS.to_vec(),
*REGISTRY
).unwrap();

pub static ref ACTOR_KV_KEYS_PER_OP: HistogramVec = register_histogram_vec_with_registry!(
"actor_kv_keys_per_operation",
"pegboard_actor_kv_keys_per_operation",
"Number of keys per actor KV operation.",
&["op"],
vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0],
Expand Down
11 changes: 3 additions & 8 deletions rivetkit-rust/packages/rivetkit-core/src/actor/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,18 +224,13 @@ impl ActorContext {
name: String,
key: ActorKey,
region: String,
generation: Option<u32>,
envoy_key: String,
_generation: Option<u32>,
_envoy_key: String,
config: ActorConfig,
kv: Kv,
sql: SqliteDb,
) -> Self {
let metrics = ActorMetrics::new(
actor_id.clone(),
generation,
format_actor_key(&key),
envoy_key,
);
let metrics = ActorMetrics::new(name.clone());
#[cfg(feature = "sqlite-local")]
let mut sql = sql;
#[cfg(feature = "sqlite-local")]
Expand Down
Loading
Loading