diff --git a/CHANGELOG.md b/CHANGELOG.md index 291dce0fc509..abfe42a2964a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ - [#6913](https://github.com/ChainSafe/forest/issues/6913): Added support for NV28 for devnets. Use `FOREST_FIREHORSE_HEIGHT` environment variable to set the upgrade height. +- [#6926](https://github.com/ChainSafe/forest/pull/6926): Added strict JSON validation to deny unknown fields in RPC request parameters and response results when `FOREST_STRICT_JSON` is enabled. + ### Changed - [#6939](https://github.com/ChainSafe/forest/pull/6939): Refactored snapshot export and garbage collection logic to use disk-backed hash set for de-de-duplicating reachable blocks. This results in less RAM usage (~6-7GiB) and more disk usage (~7-8GiB on mainnet). diff --git a/Cargo.lock b/Cargo.lock index 66a13acc8e53..bb4db5c4ef62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3410,6 +3410,7 @@ dependencies = [ "scopeguard", "semver", "serde", + "serde_ignored", "serde_ipld_dagcbor", "serde_json", "serde_with", @@ -8655,6 +8656,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "serde_ignored" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "115dffd5f3853e06e746965a20dcbae6ee747ae30b543d91b0e089668bb07798" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_ipld_dagcbor" version = "0.6.4" diff --git a/Cargo.toml b/Cargo.toml index a17b5d68ead1..e41a78c665d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -198,6 +198,7 @@ schemars = { version = "1", features = ["chrono04", "uuid1"] } scopeguard = "1" semver = "1" serde = { workspace = true } +serde_ignored = "0.1" serde_ipld_dagcbor = "0.6" serde_json = { version = "1", features = ["raw_value"] } serde_with = { version = "3", features = ["chrono_0_4"] } diff --git a/docs/docs/users/reference/env_variables.md b/docs/docs/users/reference/env_variables.md index 0d983f78e6b3..c95b3d86c3b4 100644 --- a/docs/docs/users/reference/env_variables.md +++ b/docs/docs/users/reference/env_variables.md @@ -58,7 +58,7 @@ process. | `FOREST_JWT_DISABLE_EXP_VALIDATION` | 1 or true | empty | 1 | Whether or not to disable JWT expiration validation | | `FOREST_ETH_BLOCK_CACHE_SIZE` | positive integer | 500 | 1 | The size of Eth block cache | | `FOREST_RPC_BACKFILL_FULL_TIPSET_FROM_NETWORK` | 1 or true | false | 1 | Whether or not to backfill full tipsets from the p2p network | -| `FOREST_STRICT_JSON` | 1 or true | false | 1 | Enable strict JSON validation to detect duplicate keys in RPC requests | +| `FOREST_STRICT_JSON` | 1 or true | false | 1 | Enable strict JSON validation to detect duplicate keys and reject unknown fields in RPC requests and responses | | `FOREST_AUTO_DOWNLOAD_SNAPSHOT_PATH` | URL or file path | empty | `/var/tmp/forest_snapshot_calibnet.forest.car.zst` | Override snapshot path for `--auto-download-snapshot` | | `FOREST_DOWNLOAD_CONNECTIONS` | positive integer | 5 | 10 | Number of parallel HTTP connections for downloading snapshots | | `FOREST_ETH_V1_DISABLE_F3_FINALITY_RESOLUTION` | 1 or true | empty | 1 | Whether or not to disable F3 finality resolution in Eth `v1` RPC methods | diff --git a/src/rpc/json_validator.rs b/src/rpc/json_validator.rs index 6d63356c921e..7f23f70bcc90 100644 --- a/src/rpc/json_validator.rs +++ b/src/rpc/json_validator.rs @@ -1,12 +1,19 @@ // Copyright 2019-2026 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT -//! JSON validation utilities for detecting duplicate keys before serde_json processing. +//! JSON validation utilities for RPC requests and responses processing. //! -//! serde_json automatically deduplicates keys at parse time using a "last-write-wins" strategy -//! This means JSON like `{"/":"cid1", "/":"cid2"}` will keep only the last value, which can lead to unexpected behavior in RPC calls. +//! - **Duplicate key detection**: `serde_json` automatically deduplicates keys at parse time +//! using a "last-write-wins" strategy. This means JSON like `{"/":"cid1", "/":"cid2"}` will +//! keep only the last value, which can lead to unexpected behavior in RPC calls. +//! - **Unknown field detection**: `serde_json` silently ignores unknown fields by default. +//! In strict mode, [`from_value_rejecting_unknown_fields`] applies to RPC request and +//! responses. +//! +//! All of this is gated behind the `FOREST_STRICT_JSON` environment variable. use ahash::HashSet; +use serde::de::DeserializeOwned; pub const STRICT_JSON_ENV: &str = "FOREST_STRICT_JSON"; @@ -50,9 +57,32 @@ pub fn validate_json_for_duplicates(json_str: &str) -> Result<(), String> { check_value(&value) } +/// De-serializes a [`serde_json::Value`] into `T`, rejecting unknown fields when strict mode is +/// enabled. When strict mode is off, this is equivalent to [`serde_json::from_value`]. +pub fn from_value_rejecting_unknown_fields( + value: serde_json::Value, +) -> Result { + if !is_strict_mode() { + return serde_json::from_value(value); + } + let mut unknown = Vec::new(); + let result: T = serde_ignored::deserialize(value, |path| { + unknown.push(path.to_string()); + })?; + if !unknown.is_empty() { + return Err(serde::de::Error::custom(format!( + "unknown field(s): {}. Set {STRICT_JSON_ENV}=0 to disable this check", + unknown.join(", ") + ))); + } + Ok(result) +} + #[cfg(test)] mod tests { use super::*; + use serde::Deserialize; + use serde_json::json; use serial_test::serial; fn with_strict_mode(enabled: bool, f: F) @@ -131,4 +161,54 @@ mod tests { assert!(result.unwrap_err().contains("duplicate key '/'")); }); } + + #[derive(Debug, Deserialize, PartialEq)] + struct RpcTestReq { + name: String, + value: i32, + } + + #[test] + #[serial] + fn test_unknown_fields_known_only() { + with_strict_mode(true, || { + let val = json!({"name": "alice", "value": 42}); + let result = from_value_rejecting_unknown_fields::(val); + assert_eq!( + result.unwrap(), + RpcTestReq { + name: "alice".into(), + value: 42 + } + ); + }); + } + + #[test] + #[serial] + fn test_unknown_fields_detected() { + with_strict_mode(true, || { + let val = json!({"name": "alice", "value": 42, "extra": true}); + let err = from_value_rejecting_unknown_fields::(val) + .expect_err("expected Err when unknown JSON field is present under strict mode"); + let msg = err.to_string(); + assert!( + msg.contains("unknown field(s)") && msg.contains("extra"), + "got: {msg}" + ); + }); + } + + #[test] + #[serial] + fn test_unknown_fields_strict_mode_off() { + with_strict_mode(false, || { + let val = json!({"name": "alice", "value": 42, "extra": true}); + let result = from_value_rejecting_unknown_fields::(val); + assert!( + result.is_ok(), + "unknown fields should be allowed when strict mode is off" + ); + }); + } } diff --git a/src/rpc/reflect/mod.rs b/src/rpc/reflect/mod.rs index 2aab1b6bdf5a..b847e083111f 100644 --- a/src/rpc/reflect/mod.rs +++ b/src/rpc/reflect/mod.rs @@ -350,7 +350,7 @@ pub trait RpcMethodExt: RpcMethod { // Client::call has an inappropriate HasLotusJson // bound, work around it for now. let json = client.call(Self::request(params)?.map_ty()).await?; - Ok(serde_json::from_value(json)?) + Ok(crate::rpc::json_validator::from_value_rejecting_unknown_fields(json)?) } } fn call( diff --git a/src/rpc/reflect/parser.rs b/src/rpc/reflect/parser.rs index cd745e93e98d..4c9f2bf22a23 100644 --- a/src/rpc/reflect/parser.rs +++ b/src/rpc/reflect/parser.rs @@ -7,7 +7,7 @@ use serde::Deserialize; use serde_json::{Value, json}; use super::{jsonrpc_types::RequestParameters, util::Optional as _}; -use crate::rpc::error::ServerError; +use crate::rpc::{error::ServerError, json_validator}; /// Parser for JSON-RPC parameters. /// Abstracts calling convention, checks for unexpected params etc, so that @@ -142,7 +142,7 @@ impl<'a> Parser<'a> { false => self.error(missing_parameter)?, }, Some(ParserInner::ByName(it)) => match it.remove(name) { - Some(it) => match serde_json::from_value::(it) { + Some(it) => match json_validator::from_value_rejecting_unknown_fields::(it) { Ok(it) => it, Err(e) => self.error(deserialize_error(e))?, }, @@ -152,7 +152,7 @@ impl<'a> Parser<'a> { }, }, Some(ParserInner::ByPosition(it)) => match it.pop_front() { - Some(it) => match serde_json::from_value::(it) { + Some(it) => match json_validator::from_value_rejecting_unknown_fields::(it) { Ok(it) => it, Err(e) => self.error(deserialize_error(e))?, }, diff --git a/src/tool/subcommands/api_cmd/api_compare_tests.rs b/src/tool/subcommands/api_cmd/api_compare_tests.rs index da915ccecd65..6d5bad680bcc 100644 --- a/src/tool/subcommands/api_cmd/api_compare_tests.rs +++ b/src/tool/subcommands/api_cmd/api_compare_tests.rs @@ -315,11 +315,13 @@ impl RpcTest { fn basic_raw(request: rpc::Request) -> Self { Self { request: request.map_ty(), - check_syntax: Box::new(|it| match serde_json::from_value::(it) { - Ok(_) => true, - Err(e) => { - debug!(?e); - false + check_syntax: Box::new(|it| { + match crate::rpc::json_validator::from_value_rejecting_unknown_fields::(it) { + Ok(_) => true, + Err(e) => { + debug!(?e); + false + } } }), check_semantics: Box::new(|_, _| true), @@ -345,17 +347,23 @@ impl RpcTest { ) -> Self { Self { request: request.map_ty(), - check_syntax: Box::new(|value| match serde_json::from_value::(value) { - Ok(_) => true, - Err(e) => { - debug!("{e}"); - false + check_syntax: Box::new(|value| { + match crate::rpc::json_validator::from_value_rejecting_unknown_fields::(value) { + Ok(_) => true, + Err(e) => { + debug!("{e}"); + false + } } }), check_semantics: Box::new(move |forest_json, lotus_json| { match ( - serde_json::from_value::(forest_json), - serde_json::from_value::(lotus_json), + crate::rpc::json_validator::from_value_rejecting_unknown_fields::( + forest_json, + ), + crate::rpc::json_validator::from_value_rejecting_unknown_fields::( + lotus_json, + ), ) { (Ok(forest), Ok(lotus)) => validate(forest, lotus), (forest, lotus) => {