From 48bad028a00fd3cb20c5a0e682bdb740ca1c0301 Mon Sep 17 00:00:00 2001 From: Arnau Bennassar Date: Thu, 23 Apr 2026 19:56:03 +0200 Subject: [PATCH 1/4] wip --- .gitignore | 3 +- docs/backward_forward_let_runbook.md | 58 +++++++++++++------ .../RECOVERY_PROCEDURE.md | 39 ++++++++++--- tools/backward_forward_let/diagnosis.go | 8 +++ 4 files changed, 83 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 8589039df..f889e9832 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ data *.sqlite-shm *.sqlite-wal **aggkit-001-data** -.vscode \ No newline at end of file +.vscode +debug \ No newline at end of file diff --git a/docs/backward_forward_let_runbook.md b/docs/backward_forward_let_runbook.md index 62299c834..e9c0ae5fb 100644 --- a/docs/backward_forward_let_runbook.md +++ b/docs/backward_forward_let_runbook.md @@ -353,20 +353,23 @@ These map directly to the `LeafData` fields required by `forwardLET`. > **Prerequisite**: The aggsender must be the same instance that submitted the divergent certificate (its DB holds that certificate's data). If the aggsender was replaced or its database was lost, fall back to Option 2. -#### Option 2: contact the AggLayer node admin (fallback) +#### Option 2: AggLayer admin fallback -If Option 1 is unavailable (aggsender DB lost, different aggsender instance, or the API is unreachable), contact the operator of the AggLayer node and request the full certificate body for the divergent certificate ID. +If Option 1 is unavailable because the aggsender DB was wiped, the relevant certificate +was submitted by a different aggsender instance, or the aggsender API cannot provide +bridge exits, stop the normal runbook flow and switch to the dedicated fallback +procedure in [`tools/backward_forward_let/RECOVERY_PROCEDURE.md`](../tools/backward_forward_let/RECOVERY_PROCEDURE.md). -Provide them with the certificate ID obtained in Step 1: +That document is the canonical operator guide for: -```bash -# Certificate ID from GetNetworkInfo (settled_certificate_id) -echo "Certificate ID: $CERT_ID" -echo "Network ID: $NETWORK_ID" -echo "Height: " -``` +- discovering the missing certificate heights and cert IDs from the tool output +- calling `admin_getCertificate` on the AggLayer admin API +- building the JSON override file +- re-running the tool with `--cert-exits-file` -The AggLayer node operator can retrieve the full certificate body — including all `bridge_exits` — from their internal storage and share the leaf data needed to construct the `forwardLET` call. +It also explains an important operator expectation: after an aggsender DB wipe, the +missing range may span the full settled history, and that should be handled with +automation or admin batch export rather than manual per-height work. ### Summary: determining the recovery case @@ -383,15 +386,36 @@ After collecting the data above: ### Using the tool -A dedicated tool to automate the recovery process is **under development**. Once available, this tool will: +Use `backward-forward-let` as the primary operator entry point. + +The tool already: + +- queries the AggLayer node for the settled L1 state +- compares it against the current LET state on L2 +- queries aggsender for settled certificate bridge exits +- determines the recovery case +- computes the required `backwardLET` and `forwardLET` inputs +- executes the on-chain recovery flow + +Normal path: + +```bash +backward-forward-let --cfg aggkit-config.toml +``` + +Fallback path when aggsender cannot provide bridge exits: + +```bash +backward-forward-let --cfg aggkit-config.toml \ + --cert-exits-file certificate_exits_override.json +``` + +Use the dedicated fallback document for the DB-wipe / missing-cert path: -- Query the AggLayer node for the expected LER on L1 -- Compare it against the current LET state on L2 -- Determine the required sequence of `backwardLET` and `forwardLET` calls -- Compute the necessary Merkle proofs, frontiers, and leaf data -- Execute the smart contract calls in the correct order +- [`tools/backward_forward_let/RECOVERY_PROCEDURE.md`](../tools/backward_forward_let/RECOVERY_PROCEDURE.md) -Until the tool is available, recovery must be performed manually as described below. +The manual contract-call material below is retained as contract-level reference and for +debugging, not as the primary operator workflow. ### Contract function signatures reference diff --git a/tools/backward_forward_let/RECOVERY_PROCEDURE.md b/tools/backward_forward_let/RECOVERY_PROCEDURE.md index 9cd2549a8..dbd0b02b7 100644 --- a/tools/backward_forward_let/RECOVERY_PROCEDURE.md +++ b/tools/backward_forward_let/RECOVERY_PROCEDURE.md @@ -1,9 +1,13 @@ -# Backward/Forward LET — Manual Recovery Procedure +# Backward/Forward LET — Fallback Recovery Procedure -This document describes the steps for recovering from a backward/forward LET divergence -when the aggsender database is empty or has been wiped. In this situation the tool cannot -fetch bridge exits from the aggsender RPC and instead needs the data extracted directly -from the agglayer node. +This document is the canonical fallback procedure for `backward-forward-let` when the +aggsender database is empty, has been wiped, or otherwise cannot provide certificate +bridge exits. + +In this situation the tool can still diagnose the settled AggLayer state, but it cannot +complete the divergence walk from aggsender data alone. The operator must extract the +missing certificate bridge exits from the AggLayer admin API, build an override file, +and rerun the tool with `--cert-exits-file`. --- @@ -43,11 +47,24 @@ Missing certificates (2 heights): `(network_id, height)` in the `certificate_per_network_cf` column family of the agglayer state DB and supply the cert ID manually before you can proceed. +Important operator note: + +- After an aggsender DB wipe, the missing range may span the full settled history + (`0..latest settled height`), not just the newest malicious certificate. That is normal + for this fallback path. +- A large missing range is not a signal that the operator should do hundreds or thousands + of manual one-by-one admin lookups. +- For large ranges, use automation: either script the `admin_getCertificate` calls for all + known cert IDs, or ask the agglayer admin for a batch export of cert IDs and bridge exits. + --- ## Step 2 — Fetch each certificate from the agglayer admin API -For each cert ID printed by the tool, call `admin_getCertificate`: +For small ranges, you can call `admin_getCertificate` manually per cert ID. For large +ranges, script this step or ask the agglayer admin for a batch export instead. + +Per-certificate example: ```bash AGGLAYER_ADMIN="http://localhost:4446" @@ -207,7 +224,15 @@ When the tool reports `CertID: UNKNOWN` for a height, the agglayer admin must: auto-resolved heights and re-runs the tool. Only the latest settled height is auto-resolvable via the public agglayer gRPC. All -earlier heights require this manual lookup when the aggsender DB is absent. +earlier heights require this lookup when the aggsender DB is absent. + +For large missing ranges, do not treat this as a one-by-one manual task. The expected +workflow is to: + +1. obtain the missing cert IDs in bulk from the agglayer admin or from pre-collected + aggsender submission logs +2. fetch `bridge_exits` in bulk with a script or admin-side export +3. build a single override file and rerun the tool once --- diff --git a/tools/backward_forward_let/diagnosis.go b/tools/backward_forward_let/diagnosis.go index 60e28318d..085725c57 100644 --- a/tools/backward_forward_let/diagnosis.go +++ b/tools/backward_forward_let/diagnosis.go @@ -460,6 +460,14 @@ func printMissingCertReport(w io.Writer, result *DiagnosisResult) { } fmt.Fprintln(w) + if n > 1 { + fmt.Fprintln(w, "NOTE: After an aggsender DB wipe, this missing range may span the full settled history") + fmt.Fprintln(w, " (for example heights 0..latest). This is expected for the fallback path.") + fmt.Fprintln(w, " Do not fetch large ranges one-by-one manually; use a script or ask the agglayer admin") + fmt.Fprintln(w, " for a batch export of cert IDs / bridge exits when many heights are missing.") + fmt.Fprintln(w) + } + if hasUnknown { fmt.Fprintln(w, "NOTE: For heights with UNKNOWN cert IDs, ask the agglayer admin to look up") fmt.Fprintln(w, " (network_id, height) in the agglayer's certificate_per_network_cf column family,") From e2966abd76ed4cb3182765896dab80c99ae8d3fb Mon Sep 17 00:00:00 2001 From: Arnau Bennassar Date: Wed, 6 May 2026 22:44:33 +0100 Subject: [PATCH 2/4] feat: improve backward forward let tooling --- aggsender/rpcclient/client.go | 28 +- aggsender/rpcclient/client_test.go | 48 +- docs/backward_forward_let_runbook.md | 1008 ----------------- .../RECOVERY_PROCEDURE.md | 274 +---- tools/backward_forward_let/cert_status.go | 120 ++ .../backward_forward_let/cert_status_test.go | 56 + tools/backward_forward_let/cmd/main.go | 58 +- tools/backward_forward_let/config.go | 8 +- tools/backward_forward_let/craft_cert.go | 295 +++++ tools/backward_forward_let/craft_cert_test.go | 28 + tools/backward_forward_let/diagnosis.go | 88 +- tools/backward_forward_let/diagnosis_test.go | 45 +- .../backward_forward_let/export_cert_exits.go | 243 ++++ .../export_cert_exits_test.go | 214 ++++ tools/backward_forward_let/network_info.go | 32 + .../backward_forward_let/network_info_test.go | 47 + tools/backward_forward_let/recovery.go | 66 ++ tools/backward_forward_let/run.go | 13 +- tools/backward_forward_let/send_cert.go | 25 +- tools/backward_forward_let/send_cert_test.go | 42 + tools/backward_forward_let/types.go | 6 + 21 files changed, 1386 insertions(+), 1358 deletions(-) delete mode 100644 docs/backward_forward_let_runbook.md create mode 100644 tools/backward_forward_let/cert_status.go create mode 100644 tools/backward_forward_let/cert_status_test.go create mode 100644 tools/backward_forward_let/craft_cert.go create mode 100644 tools/backward_forward_let/craft_cert_test.go create mode 100644 tools/backward_forward_let/export_cert_exits.go create mode 100644 tools/backward_forward_let/export_cert_exits_test.go create mode 100644 tools/backward_forward_let/network_info.go create mode 100644 tools/backward_forward_let/network_info_test.go diff --git a/aggsender/rpcclient/client.go b/aggsender/rpcclient/client.go index 3d711b5df..6c5f15662 100644 --- a/aggsender/rpcclient/client.go +++ b/aggsender/rpcclient/client.go @@ -1,29 +1,35 @@ package rpcclient import ( + "context" "encoding/json" "fmt" + "time" "github.com/0xPolygon/cdk-rpc/rpc" agglayertypes "github.com/agglayer/aggkit/agglayer/types" "github.com/agglayer/aggkit/aggsender/types" ) -var jSONRPCCall = rpc.JSONRPCCall +var jSONRPCCallWithContext = rpc.JSONRPCCallWithContext + +const defaultRequestTimeout = 10 * time.Second // Client wraps all the available endpoints of the data abailability committee node server type Client struct { - url string + url string + requestTimeout time.Duration } func NewClient(url string) *Client { return &Client{ - url: url, + url: url, + requestTimeout: defaultRequestTimeout, } } func (c *Client) GetStatus() (*types.AggsenderInfo, error) { - response, err := jSONRPCCall(c.url, "aggsender_status") + response, err := c.call("aggsender_status") if err != nil { return nil, err } @@ -41,7 +47,7 @@ func (c *Client) GetStatus() (*types.AggsenderInfo, error) { } func (c *Client) GetCertificateHeaderPerHeight(height *uint64) (*types.Certificate, error) { - response, err := jSONRPCCall(c.url, "aggsender_getCertificateHeaderPerHeight", height) + response, err := c.call("aggsender_getCertificateHeaderPerHeight", height) if err != nil { return nil, err } @@ -61,7 +67,7 @@ func (c *Client) GetCertificateHeaderPerHeight(height *uint64) (*types.Certifica // GetCertificateBridgeExits returns the bridge exits for the certificate at the given height. // If height is nil, returns the bridge exits of the last sent certificate. func (c *Client) GetCertificateBridgeExits(height *uint64) ([]*agglayertypes.BridgeExit, error) { - response, err := jSONRPCCall(c.url, "aggsender_getCertificateBridgeExits", height) + response, err := c.call("aggsender_getCertificateBridgeExits", height) if err != nil { return nil, err } @@ -74,3 +80,13 @@ func (c *Client) GetCertificateBridgeExits(height *uint64) ([]*agglayertypes.Bri } return exits, nil } + +func (c *Client) call(method string, params ...interface{}) (rpc.Response, error) { + timeout := c.requestTimeout + if timeout <= 0 { + timeout = defaultRequestTimeout + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + return jSONRPCCallWithContext(ctx, c.url, method, params...) +} diff --git a/aggsender/rpcclient/client_test.go b/aggsender/rpcclient/client_test.go index 929f9b201..7cb79fd42 100644 --- a/aggsender/rpcclient/client_test.go +++ b/aggsender/rpcclient/client_test.go @@ -1,10 +1,12 @@ package rpcclient import ( + "context" "encoding/json" "fmt" "math/big" "testing" + "time" "github.com/0xPolygon/cdk-rpc/rpc" agglayertypes "github.com/agglayer/aggkit/agglayer/types" @@ -22,7 +24,7 @@ func TestGetCertificateHeaderPerHeight(t *testing.T) { response := rpc.Response{ Result: responseCertJSON, } - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return response, nil } cert, err := sut.GetCertificateHeaderPerHeight(&height) @@ -47,7 +49,7 @@ func TestGetCertificateBridgeExits(t *testing.T) { response := rpc.Response{ Result: responseExitsJSON, } - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return response, nil } exits, err := sut.GetCertificateBridgeExits(&height) @@ -65,7 +67,7 @@ func TestGetStatus(t *testing.T) { response := rpc.Response{ Result: responseDataJSON, } - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return response, nil } result, err := sut.GetStatus() @@ -74,11 +76,33 @@ func TestGetStatus(t *testing.T) { require.Equal(t, responseData, *result) } +func TestClientCallUsesTimeout(t *testing.T) { + sut := NewClient("url") + sut.requestTimeout = 50 * time.Millisecond + + responseData := types.AggsenderInfo{} + responseDataJSON, err := json.Marshal(responseData) + require.NoError(t, err) + response := rpc.Response{ + Result: responseDataJSON, + } + jSONRPCCallWithContext = func(ctx context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { + deadline, ok := ctx.Deadline() + require.True(t, ok) + require.Positive(t, time.Until(deadline)) + require.LessOrEqual(t, time.Until(deadline), 50*time.Millisecond) + return response, nil + } + + _, err = sut.GetStatus() + require.NoError(t, err) +} + func TestGetStatus_Errors(t *testing.T) { sut := NewClient("url") t.Run("rpc call error", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{}, fmt.Errorf("network error") } _, err := sut.GetStatus() @@ -86,7 +110,7 @@ func TestGetStatus_Errors(t *testing.T) { }) t.Run("response error field set", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{Error: &rpc.ErrorObject{Message: "rpc error"}}, nil } _, err := sut.GetStatus() @@ -95,7 +119,7 @@ func TestGetStatus_Errors(t *testing.T) { }) t.Run("unmarshal error", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{Result: json.RawMessage("not-json")}, nil } _, err := sut.GetStatus() @@ -109,7 +133,7 @@ func TestGetCertificateHeaderPerHeight_Errors(t *testing.T) { height := uint64(1) t.Run("rpc call error", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{}, fmt.Errorf("network error") } _, err := sut.GetCertificateHeaderPerHeight(&height) @@ -117,7 +141,7 @@ func TestGetCertificateHeaderPerHeight_Errors(t *testing.T) { }) t.Run("response error field set", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{Error: &rpc.ErrorObject{Message: "rpc error"}}, nil } _, err := sut.GetCertificateHeaderPerHeight(&height) @@ -126,7 +150,7 @@ func TestGetCertificateHeaderPerHeight_Errors(t *testing.T) { }) t.Run("unmarshal error", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{Result: json.RawMessage("not-json")}, nil } _, err := sut.GetCertificateHeaderPerHeight(&height) @@ -140,7 +164,7 @@ func TestGetCertificateBridgeExits_Errors(t *testing.T) { height := uint64(5) t.Run("rpc call error", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{}, fmt.Errorf("network error") } _, err := sut.GetCertificateBridgeExits(&height) @@ -148,7 +172,7 @@ func TestGetCertificateBridgeExits_Errors(t *testing.T) { }) t.Run("response error field set", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{Error: &rpc.ErrorObject{Message: "rpc error"}}, nil } _, err := sut.GetCertificateBridgeExits(&height) @@ -157,7 +181,7 @@ func TestGetCertificateBridgeExits_Errors(t *testing.T) { }) t.Run("unmarshal error", func(t *testing.T) { - jSONRPCCall = func(_, _ string, _ ...interface{}) (rpc.Response, error) { + jSONRPCCallWithContext = func(_ context.Context, _, _ string, _ ...interface{}) (rpc.Response, error) { return rpc.Response{Result: json.RawMessage("not-json")}, nil } _, err := sut.GetCertificateBridgeExits(&height) diff --git a/docs/backward_forward_let_runbook.md b/docs/backward_forward_let_runbook.md deleted file mode 100644 index e9c0ae5fb..000000000 --- a/docs/backward_forward_let_runbook.md +++ /dev/null @@ -1,1008 +0,0 @@ -# Backward and Forward LET runbook - -## Introduction - -The **Local Exit Tree (LET)** is a Merkle tree maintained on L2 that tracks all bridge deposits originating from a given chain. Every time a bridge operation occurs on L2, a new leaf is appended to the LET. Periodically, the `aggsender` component bundles these leaves into a certificate and sends it to the AggLayer, which settles the resulting **Local Exit Root (LER)** on L1. - -Under normal operation, the LET on L2 and the LER settled on L1 stay in sync. However, certain failure scenarios can cause them to **diverge**: L1 has a settled LER that does not match the actual state of the LET on L2. When this happens, the L2 network must reconcile its LET to match what was settled on L1, otherwise future certificates will be rejected by the AggLayer because the LER will not match. - -To handle these cases, two admin smart contract functions are provided on the [`AgglayerBridgeL2`](https://agglayer.github.io/protocol-team-docs/smart-contracts/v12/AgglayerBridgeL2/) contract: - -- **[`backwardLET`](https://agglayer.github.io/protocol-team-docs/smart-contracts/v12/AgglayerBridgeL2/#13-backwardlet)**: Rolls the LET backward to a previous state with fewer deposits. This is used to remove leaves that were added on L2 but do not match what was settled on L1. ([source](https://github.com/agglayer/agglayer-contracts/blob/v12.2.0/contracts/sovereignChains/AgglayerBridgeL2.sol#L732)) -- **[`forwardLET`](https://agglayer.github.io/protocol-team-docs/smart-contracts/v12/AgglayerBridgeL2/#14-forwardlet)**: Advances the LET by adding one or more leaves in a single transaction. This is used to insert leaves that were settled on L1 but are missing from the L2 tree. ([source](https://github.com/agglayer/agglayer-contracts/blob/v12.2.0/contracts/sovereignChains/AgglayerBridgeL2.sol#L797)) - -Both functions can **only** be called while the `AgglayerBridgeL2` contract is in **emergency mode**, and only by an account holding the `GlobalExitRootRemover` role. - -## Prerequisites - -Before starting, ensure you have these environment variables set. They are referenced throughout the runbook: - -```bash -# ── Network RPC endpoints ── -export L2_RPC_URL="" - -# ── Contract addresses (L2) ── -export BRIDGE_L2_ADDR="" -export GER_L2_ADDR="" - -# ── AggLayer endpoints ── -export AGGLAYER_GRPC="" - -# ── Bridge service endpoint ── -export BRIDGE_SERVICE_URL="" # e.g. http://localhost:8080/bridge/v1 - -# ── Network ID of the affected L2 chain ── -export NETWORK_ID="" - -# ── Private key of the account holding the GlobalExitRootRemover role ── -# This same account is used for backwardLET and forwardLET calls. -# For activateEmergencyState/deactivateEmergencyState, the emergencyBridgePauser -# and emergencyBridgeUnpauser keys are needed respectively (may be different accounts). -export GER_REMOVER_PK="" -export EMERGENCY_PAUSER_PK="" -export EMERGENCY_UNPAUSER_PK="" -``` - -### Verify role addresses - -Before proceeding, confirm which accounts hold each role: - -```bash -# Who can call backwardLET / forwardLET (GlobalExitRootRemover)? -cast call $GER_L2_ADDR "globalExitRootRemover()(address)" --rpc-url $L2_RPC_URL - -# Who can activate emergency state? -cast call $BRIDGE_L2_ADDR "emergencyBridgePauser()(address)" --rpc-url $L2_RPC_URL - -# Who can deactivate emergency state? -cast call $BRIDGE_L2_ADDR "emergencyBridgeUnpauser()(address)" --rpc-url $L2_RPC_URL -``` - -## Detection - -A backward/forward LET operation is needed when the LER settled on L1 diverges from the LET state on L2. This can be detected through the following indicators: - -### 1. Certificate rejected by the AggLayer - -The `aggsender` submits a certificate to the AggLayer, which rejects it because the `PrevLocalExitRoot` in the certificate does not match the last settled LER on L1. This is the most common first signal of divergence. - -The certificate transitions to `InError` status on the AggLayer side. The `aggsender` detects this via its periodic status checker and logs: - -| File | Line | Level | Message | -|------|------|-------|---------| -| `aggsender/statuschecker/cert_status_checker.go` | 187 | `INFO` | `certificate changed status from [] to [InError] elapsed time: full_cert (agglayer): ` | -| `aggsender/statuschecker/cert_status_checker.go` | 169 | `INFO` | `found InError certificate(s) with no pending certs, enabling retry` | -| `aggsender/aggsender.go` | 332 | `INFO` | `An InError cert exists. Sending a new one ()` | -| `aggsender/aggsender.go` | 365 | `ERROR` | `Certificate send trigger: error sending certificate: ` | -| `aggsender/aggsender.go` | 536 | `ERROR` | `error creating non accepted certificate: . Err: ` | -| `aggsender/aggsender.go` | 541 | `ERROR` | `error saving non accepted certificate: . Err: ` | - -**Recommended alarms**: alert on the `InError` status transition (`INFO` log at `cert_status_checker.go:187` matching `"changed status from.*to \[InError\]"`) and on the `ERROR` at `aggsender.go:365` (`"Certificate send trigger: error sending certificate"`). - -### 2. LER mismatch detected during certificate validation - -When the `aggsender` attempts to build and validate a new certificate, the local validator compares the certificate's `PrevLocalExitRoot` against the expected value. A mismatch surfaces as an error in the following paths: - -| File | Line | Level | Message | -|------|------|-------|---------| -| `aggsender/validator/validate_certificate.go` | 155 | `ERROR` (via `fmt.Errorf`) | `certificate PrevLocalExitRoot is not equal to previous certificate NewLocalExitRoot ` | -| `aggsender/validator/validate_certificate.go` | 196 | `ERROR` (via `fmt.Errorf`) | `first certificate must have correct starting PrevLocalExitRoot: , but got: ` | -| `aggsender/aggsender.go` | 432 | `WARN` | `error validating certificate locally: ` | -| `aggsender/aggsender.go` | 329 | `ERROR` | `error checking last certificate from agglayer: ` | - -**Recommended alarms**: alert on `WARN` at `aggsender.go:432` (`"error validating certificate locally"`) and on any log containing `"PrevLocalExitRoot"` and `"is not equal"` or `"but got"`. - -### 3. AggSender unable to build or send certificates - -When the `aggsender` repeatedly fails to build or submit a valid certificate (e.g., after a restart following a key compromise), it logs continuously on each retry cycle: - -| File | Line | Level | Message | -|------|------|-------|---------| -| `aggsender/aggsender.go` | 419 | `ERROR` (via `fmt.Errorf`) | `error getting certificate build params: ` | -| `aggsender/aggsender.go` | 428 | `ERROR` (via `fmt.Errorf`) | `error building certificate: ` | -| `aggsender/aggsender.go` | 460 | `ERROR` (via `fmt.Errorf`) | `error sending certificate: ` | -| `aggsender/aggsender.go` | 365 | `ERROR` | `Certificate send trigger: error sending certificate: ` | -| `aggsender/aggsender.go` | 359 | `ERROR` | `Certificate send trigger: error checking certificate status: ` | - -**Recommended alarms**: alert on repeated occurrences of `ERROR` at `aggsender.go:365` (`"Certificate send trigger: error sending certificate"`). A single occurrence may be transient; sustained repetition indicates a structural issue requiring investigation. - ---- - -**Root causes** that can trigger this divergence include: - -- **Compromised or buggy `aggsender`**: The `aggsender` private key is compromised or the component has a bug, causing it to craft and submit a certificate with leaves that do not correspond to actual L2 bridge events. -- **L2 network reorg (outpost networks)**: The L2 network reorgs after a certificate has already been settled on L1, meaning the block that contained certain bridge events no longer exists or has different contents. - -## Diagnosis - -Once detection signals indicate a divergence, the next step is to **determine the exact state on both sides** and identify which recovery case applies. This section provides concrete commands to gather all the data needed. - -### Step 1: Query the AggLayer for settled state (L1 truth) - -The AggLayer's `GetNetworkInfo` gRPC call returns the last settled certificate details including the settled LER and leaf count: - -```bash -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -``` - -From the response, extract: -- `settled_ler` — the LER that L1 considers as truth -- `settled_let_leaf_count` — the deposit count at which L1 settled (this is the **L1 deposit count**) -- `settled_height` — the certificate height of the last settled certificate -- `settled_certificate_id` — the ID of that certificate - -To get the full details of the last settled certificate: - -```bash -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_SETTLED\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader -``` - -This returns a `CertificateHeader` with: -- `prev_local_exit_root` — what the AggLayer expected as the starting LER -- `new_local_exit_root` — the LER after applying this certificate's leaves -- `height` — certificate height -- `status` — should be `SETTLED` (5) - -If there is also a pending (possibly InError) certificate: - -```bash -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_PENDING\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader -``` - -If `status` is `IN_ERROR` (4), the `error` field will contain the rejection reason. - -### Step 2: Query the L2 bridge contract for current state - -```bash -# Current deposit count on L2 -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL - -# Current LER (Merkle root of the LET) on L2 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL - -# Is the bridge in emergency state? -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL - -# Network ID (sanity check) -cast call $BRIDGE_L2_ADDR "networkID()(uint32)" --rpc-url $L2_RPC_URL -``` - -### Step 3: Query the bridge service for sync status - -The bridge service exposes a sync status endpoint that compares on-chain deposit counts with its local database: - -```bash -curl -s "$BRIDGE_SERVICE_URL/sync-status" | jq . -``` - -The response includes: -- `l2_info.contract_deposit_count` — on-chain deposit count -- `l2_info.synchronized_deposit_count` — how far the bridge service has synced -- `l2_info.is_synced` — whether the syncer is caught up - -### Step 4: Compare L1 vs L2 and determine the case - -Save the key values: - -```bash -# From AggLayer (Step 1) -L1_SETTLED_LER="" -L1_DEPOSIT_COUNT="" - -# From L2 contract (Step 2) -L2_LER=$(cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL) -L2_DEPOSIT_COUNT=$(cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL) - -echo "L1 settled LER: $L1_SETTLED_LER" -echo "L1 settled deposit count: $L1_DEPOSIT_COUNT" -echo "L2 current LER: $L2_LER" -echo "L2 current deposit count: $L2_DEPOSIT_COUNT" -``` - -**Important**: `L2_LER != L1_SETTLED_LER` does **not** by itself indicate divergence. Under normal operation L2 is ahead of L1 (the `aggsender` posts certificates periodically), so `L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT` and a different current root is perfectly expected. - -The key validation is to check whether `L1_SETTLED_LER` **exists in L2's history** — i.e., whether L2's tree ever had that root at `L1_DEPOSIT_COUNT` deposits. - -#### Quick checks (no archive node needed) - -```bash -# If L2 has fewer deposits than L1 settled, divergence is certain. -# L1 should never settle leaves that don't exist on L2. -if [ "$L2_DEPOSIT_COUNT" -lt "$L1_DEPOSIT_COUNT" ]; then - echo "DIVERGENCE: L1 settled $L1_DEPOSIT_COUNT deposits but L2 only has $L2_DEPOSIT_COUNT" -fi - -# If deposit counts match, a simple root comparison suffices. -if [ "$L2_DEPOSIT_COUNT" -eq "$L1_DEPOSIT_COUNT" ]; then - if [ "$L2_LER" == "$L1_SETTLED_LER" ]; then - echo "No divergence — roots match at same deposit count" - else - echo "DIVERGENCE: same deposit count ($L2_DEPOSIT_COUNT) but different roots" - fi -fi -``` - -#### When L2 is ahead (`L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT`) - -L2 being ahead is normal. To confirm divergence, verify that `L1_SETTLED_LER` matches the L2 tree's historical root at `L1_DEPOSIT_COUNT`. This requires an **archive node** for the L2 RPC. - -Use the bridge service to find the block boundary, then query the historical root: - -```bash -# deposit_count in the bridge service is 0-indexed. -# L1_DEPOSIT_COUNT is the total leaf count, so the last settled deposit is at index L1_DEPOSIT_COUNT - 1. -# The first deposit AFTER the settled set is at index L1_DEPOSIT_COUNT. -FIRST_POST_SETTLE=$(curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$L1_DEPOSIT_COUNT" | jq -r '.block_num') - -if [ "$FIRST_POST_SETTLE" != "null" ] && [ -n "$FIRST_POST_SETTLE" ]; then - # Read the L2 root at the block BEFORE the first post-settlement deposit. - # At this point, L2 should have had exactly L1_DEPOSIT_COUNT leaves. - HISTORY_BLOCK=$((FIRST_POST_SETTLE - 1)) - L2_HISTORICAL_LER=$(cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" \ - --rpc-url $L2_RPC_URL --block $HISTORY_BLOCK) - - echo "L2 historical LER at block $HISTORY_BLOCK: $L2_HISTORICAL_LER" - echo "L1 settled LER: $L1_SETTLED_LER" - - if [ "$L2_HISTORICAL_LER" == "$L1_SETTLED_LER" ]; then - echo "No divergence — L1 settled LER exists in L2 history" - else - echo "DIVERGENCE CONFIRMED — L1 settled LER does NOT match L2 tree at deposit count $L1_DEPOSIT_COUNT" - fi -else - echo "Bridge at deposit_count=$L1_DEPOSIT_COUNT not found on L2 — verify bridge service sync status" -fi -``` - -> **Note**: The archive-node query above assumes the first deposit after the settled set is in a different block than the last settled deposit. If multiple deposits land in the same block, the block boundary may not be exact. In that case, use the block of the last settled deposit (`deposit_count = L1_DEPOSIT_COUNT - 1`) and verify the deposit count at that block: -> ```bash -> LAST_SETTLED_BLOCK=$(curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$((L1_DEPOSIT_COUNT - 1))" | jq -r '.block_num') -> DEPOSIT_AT_BLOCK=$(cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL --block $LAST_SETTLED_BLOCK) -> # If DEPOSIT_AT_BLOCK == L1_DEPOSIT_COUNT, the root at this block is the one to compare. -> # If DEPOSIT_AT_BLOCK > L1_DEPOSIT_COUNT, more deposits landed in the same block — you'll need -> # to trace the transaction to get the intermediate root. -> ``` - -#### Summary - -| Condition | Result | -|-----------|--------| -| `L2_DEPOSIT_COUNT < L1_DEPOSIT_COUNT` | **Divergence** — L1 settled leaves that don't exist on L2 | -| `L2_DEPOSIT_COUNT == L1_DEPOSIT_COUNT` and `L2_LER == L1_SETTLED_LER` | **No divergence** | -| `L2_DEPOSIT_COUNT == L1_DEPOSIT_COUNT` and `L2_LER != L1_SETTLED_LER` | **Divergence** — same count, different roots | -| `L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT` and L1_SETTLED_LER **found** in L2 history | **No divergence** — L2 is simply ahead | -| `L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT` and L1_SETTLED_LER **NOT found** in L2 history | **Divergence** — L1 settled a root that L2 never had | - -### Step 5: List the L2 bridges (leaves) from the divergence point - -To understand which bridges exist on L2 after the last matching point, query the bridge service for each deposit count from the divergence point onwards: - -```bash -# Get the bridge at a specific deposit count on L2 -# Repeat for each deposit count from (last_matching_count + 1) to L2_DEPOSIT_COUNT -DEPOSIT_IDX=3 # example: first divergent position -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$DEPOSIT_IDX" | jq . -``` - -The response contains the full leaf data for that bridge: -- `leaf_type` (0=asset, 1=message) -- `origin_network` -- `origin_address` -- `destination_network` -- `destination_address` -- `amount` -- `metadata` - -Loop through all positions to build the list of L2 leaves: - -```bash -# Collect all L2 bridges from divergence point to current deposit count -DIVERGENCE_POINT=2 # last matching deposit count -for i in $(seq $((DIVERGENCE_POINT + 1)) $L2_DEPOSIT_COUNT); do - echo "=== Deposit $i ===" - curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$i" | jq '{ - deposit_count, - leaf_type, - origin_network, - origin_address, - destination_network, - destination_address, - amount, - metadata - }' -done -``` - -### Step 6: List the L1-settled leaves (divergent leaves) - -The divergent leaves (BX, BY, ...) are the ones that were included in certificates settled on L1 but do not exist on L2. These leaves are part of the `bridge_exits` field of the settled certificates. - -The AggLayer gRPC API only exposes certificate **headers** (`GetCertificateHeader`), not full certificate bodies — it does not return the individual bridge exits. Retrieving the actual leaf data requires one of the following options. - -#### Option 1: aggsender certificate API (preferred) - -The `aggsender` stores the full body of every certificate it submits, including the `bridge_exits` array. A dedicated endpoint is being added to the `aggsender` to expose this data. It will be available before this runbook is released. - -The endpoint will accept a certificate ID (or height) and return the full list of bridge exits for that certificate, including the leaf data needed for `forwardLET`: - -```bash -# Retrieve bridge exits for a specific certificate height -# The aggsender API base URL depends on your deployment configuration -AGGSENDER_API_URL="" -CERT_HEIGHT="" - -curl -s "$AGGSENDER_API_URL/certificate/$CERT_HEIGHT/bridge-exits" | jq . -``` - -The response will contain an array of bridge exit objects, each with: -- `leaf_type` (0=asset, 1=message) -- `origin_network` -- `origin_token_address` -- `dest_network` -- `dest_address` -- `amount` -- `metadata` - -These map directly to the `LeafData` fields required by `forwardLET`. - -> **Prerequisite**: The aggsender must be the same instance that submitted the divergent certificate (its DB holds that certificate's data). If the aggsender was replaced or its database was lost, fall back to Option 2. - -#### Option 2: AggLayer admin fallback - -If Option 1 is unavailable because the aggsender DB was wiped, the relevant certificate -was submitted by a different aggsender instance, or the aggsender API cannot provide -bridge exits, stop the normal runbook flow and switch to the dedicated fallback -procedure in [`tools/backward_forward_let/RECOVERY_PROCEDURE.md`](../tools/backward_forward_let/RECOVERY_PROCEDURE.md). - -That document is the canonical operator guide for: - -- discovering the missing certificate heights and cert IDs from the tool output -- calling `admin_getCertificate` on the AggLayer admin API -- building the JSON override file -- re-running the tool with `--cert-exits-file` - -It also explains an important operator expectation: after an aggsender DB wipe, the -missing range may span the full settled history, and that should be handled with -automation or admin batch export rather than manual per-height work. - -### Summary: determining the recovery case - -After collecting the data above: - -| L2 has extra leaves beyond divergence? | L1 settled extra leaves beyond divergence? | Case | -|----------------------------------------|-------------------------------------------|------| -| No | No (single divergent leaf) | **Case 1** — forwardLET only | -| Yes | No (single divergent leaf) | **Case 2** — backwardLET then forwardLET | -| No | Yes (multiple divergent leaves) | **Case 3** — forwardLET only (multiple leaves) | -| Yes | Yes (multiple divergent leaves) | **Case 4** — backwardLET then forwardLET | - -## Recovery - -### Using the tool - -Use `backward-forward-let` as the primary operator entry point. - -The tool already: - -- queries the AggLayer node for the settled L1 state -- compares it against the current LET state on L2 -- queries aggsender for settled certificate bridge exits -- determines the recovery case -- computes the required `backwardLET` and `forwardLET` inputs -- executes the on-chain recovery flow - -Normal path: - -```bash -backward-forward-let --cfg aggkit-config.toml -``` - -Fallback path when aggsender cannot provide bridge exits: - -```bash -backward-forward-let --cfg aggkit-config.toml \ - --cert-exits-file certificate_exits_override.json -``` - -Use the dedicated fallback document for the DB-wipe / missing-cert path: - -- [`tools/backward_forward_let/RECOVERY_PROCEDURE.md`](../tools/backward_forward_let/RECOVERY_PROCEDURE.md) - -The manual contract-call material below is retained as contract-level reference and for -debugging, not as the primary operator workflow. - -### Contract function signatures reference - -Before proceeding, here are the exact Solidity function signatures (from [`AgglayerBridgeL2.sol` v12.2.0](https://github.com/agglayer/agglayer-contracts/blob/v12.2.0/contracts/sovereignChains/AgglayerBridgeL2.sol)): - -```solidity -// Roll the LET backward to a previous state -// Modifiers: onlyGlobalExitRootRemover, ifEmergencyState -function backwardLET( - uint256 newDepositCount, - bytes32[32] calldata newFrontier, - bytes32 nextLeaf, - bytes32[32] calldata proof -) external virtual onlyGlobalExitRootRemover ifEmergencyState; - -// Advance the LET by adding new leaves in bulk -// Modifiers: onlyGlobalExitRootRemover, ifEmergencyState -function forwardLET( - LeafData[] calldata newLeaves, - bytes32 expectedLER -) external virtual onlyGlobalExitRootRemover ifEmergencyState; - -struct LeafData { - uint8 leafType; // 0 = asset, 1 = message - uint32 originNetwork; - address originAddress; - uint32 destinationNetwork; - address destinationAddress; - uint256 amount; - bytes metadata; -} - -// Emergency state management -// Modifier: onlyEmergencyBridgePauser -function activateEmergencyState() external onlyEmergencyBridgePauser; - -// Modifier: onlyEmergencyBridgeUnpauser -function deactivateEmergencyState() external onlyEmergencyBridgeUnpauser; -``` - -### Manually - -The manual recovery process follows these steps. Each step includes the exact CLI commands to execute. - -#### Step 1: Stop the `aggsender` - -Before performing any recovery operations, stop the `aggsender` to prevent it from interfering (e.g., attempting to send certificates while the bridge is in emergency mode). - -```bash -# Stop the aggsender process/container. -# The exact command depends on your deployment (systemd, docker, kubernetes, etc.) -# Example for docker: -docker stop aggsender - -# Example for systemd: -sudo systemctl stop aggsender -``` - -#### Step 2: Activate emergency mode - -Call `activateEmergencyState` on the bridge contract. This is a prerequisite for both `backwardLET` and `forwardLET`. - -```bash -# Verify emergency state is NOT already active -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL - -# Activate emergency state (requires emergencyBridgePauser key) -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK \ - --rpc-url $L2_RPC_URL - -# Confirm activation -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL -# Expected: true -``` - -#### Step 3: Roll back the LET if needed (`backwardLET`) - -This step is only needed if L2 has extra leaves beyond the divergence point (**Cases 2 and 4**). If only `forwardLET` is needed (**Cases 1 and 3**), skip to Step 4. - -The `backwardLET` function requires: -- `newDepositCount` — the target deposit count to roll back to (the divergence point) -- `newFrontier` — 32-element Merkle tree frontier array at the target deposit count -- `nextLeaf` — the leaf hash at position `newDepositCount` in the current tree (proof of inclusion) -- `proof` — Merkle proof that `nextLeaf` exists at position `newDepositCount` - -> **Computing `newFrontier`, `nextLeaf`, and `proof`**: These values require off-chain computation from the Merkle tree state. The recovery tool (when available) will compute these automatically. For manual computation, you need access to the full tree state (all leaves up to the current deposit count) to generate the frontier at the target count, the leaf hash at the boundary position, and a Merkle inclusion proof. - -```bash -# Example: roll back from deposit count 4 to deposit count 2 -# NEW_DEPOSIT_COUNT, NEW_FRONTIER, NEXT_LEAF, and PROOF must be computed off-chain -NEW_DEPOSIT_COUNT=2 -NEW_FRONTIER="[0x...,0x...,...]" # 32-element bytes32 array -NEXT_LEAF="0x..." # leaf hash at position newDepositCount -PROOF="[0x...,0x...,...]" # 32-element bytes32 Merkle proof - -cast send $BRIDGE_L2_ADDR \ - "backwardLET(uint256,bytes32[32],bytes32,bytes32[32])" \ - $NEW_DEPOSIT_COUNT \ - "$NEW_FRONTIER" \ - $NEXT_LEAF \ - "$PROOF" \ - --private-key $GER_REMOVER_PK \ - --rpc-url $L2_RPC_URL - -# Verify the rollback -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 2 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# Should match the LER at deposit count 2 -``` - -#### Step 4: Advance the LET (`forwardLET`) - -Call `forwardLET` to add the required leaves. This includes: -- The divergent leaf(s) settled on L1 (BX, BY, ...) -- If a `backwardLET` was performed in Step 3, the legitimate L2 bridges that were rolled back (B3, B4, ...) - -The leaves must be passed as an array of `LeafData` structs **in the correct order**: divergent leaves first, then the re-added legitimate L2 bridges. - -The `expectedLER` is the expected Merkle root after all leaves are inserted. It acts as a health check — if the computed root doesn't match, the transaction reverts. - -```bash -# Build the leaf data array. -# Each leaf is a tuple: (leafType, originNetwork, originAddress, destinationNetwork, destinationAddress, amount, metadata) -# -# Example for Case 2: insert BX (divergent), then B3 and B4 (legitimate) -# The leaf data comes from the diagnosis phase (Step 5 and Step 6 above) - -EXPECTED_LER="0x..." # the expected LER after all leaves are inserted - -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(0,1,0xOrigAddr1,2,0xDestAddr1,1000000000000000000,0x),(0,1,0xOrigAddr2,3,0xDestAddr2,2000000000000000000,0x),(0,1,0xOrigAddr3,3,0xDestAddr3,500000000000000000,0x)]" \ - $EXPECTED_LER \ - --private-key $GER_REMOVER_PK \ - --rpc-url $L2_RPC_URL - -# Verify the new state -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# The root should match EXPECTED_LER -``` - -**Computing `expectedLER`**: This is the Merkle root you expect after inserting all the leaves. It must be computed off-chain from the full leaf set. For **Cases 1 and 3** (forward-only), the expected LER after inserting all missing leaves should match the L1 settled LER if you're inserting exactly the leaves that were settled. For **Cases 2 and 4** (backward + forward), the expected LER must account for both the divergent leaves and the re-added legitimate leaves. - -#### Step 5: Deactivate emergency mode - -```bash -# Deactivate emergency state (requires emergencyBridgeUnpauser key) -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK \ - --rpc-url $L2_RPC_URL - -# Confirm deactivation -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL -# Expected: false -``` - -#### Step 6: Rebalance the chain (if needed) - -The bridge will be **undercollateralized** by the sum of amounts of all divergent leaves (BX, BY, ...). The AggLayer tracks a Local Balance Tree (LBT) for each chain, and if the LBT shows a negative balance, the next certificate will be rejected. - -Check whether rebalancing is urgent by computing the total amount of divergent leaves: - -```bash -# Sum of amounts of all divergent leaves (BX, BY, ...) -# If this amount is significant, rebalancing must happen BEFORE starting the aggsender. - -# Rebalancing steps: -# 1. Bridge the required amount from another network (LX) into this chain -# 2. Claim the bridge on L2 -# 3. Burn the claimed amount on L2 -# -# These are standard bridge operations and depend on the specific token and network involved. -``` - -#### Step 7: Start the `aggsender` - -Once the LET is corrected and rebalancing is complete (if needed), restart the `aggsender`: - -```bash -# Start the aggsender process/container -# Example for docker: -docker start aggsender - -# Example for systemd: -sudo systemctl start aggsender -``` - -After starting, the `aggsender` must craft a certificate covering the block range that includes the `BackwardLET` and `ForwardLET` events. Monitor its logs to verify: - -```bash -# Watch for successful certificate submission -# Look for log lines indicating successful certificate send -# and absence of the error patterns listed in the Detection section -``` - -The `aggsender` handles `BackwardLET` events (removing leaves from its internal DB) and `ForwardLET` events (adding leaves to its internal DB) automatically. - -#### Post-recovery verification - -After the `aggsender` resumes and submits a new certificate, verify everything is in sync: - -```bash -# 1. Check that the latest certificate is settled (not InError) -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_SETTLED\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader - -# 2. Verify L2 LER matches what AggLayer expects -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo - -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# These should be consistent - -# 3. Check bridge service sync status -curl -s "$BRIDGE_SERVICE_URL/sync-status" | jq . - -# 4. Verify no pending InError certificates -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_PENDING\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader -``` - -### Cases - -The key factor determining the recovery steps is not just the root cause of the divergence, but the **combination of events that occurred after the LET diverged**. Specifically: - -- Did further bridges occur on L2 after the divergence point? -- Did further settlements occur on L1 after the first invalid one? - -The following scenarios use this notation: - -``` -L2: B1 -> LET_1, B2 -> LET_2, B3 -> LET_3, B4 -> LET_4 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X - ^ divergence point -``` - -Where `B1..B4` are bridge events, `BX` is a divergent leaf (settled on L1 but not matching L2), and `LET_N` is the LET root after leaf N. - ---- - -#### Case 1: Divergence with no further L2 bridges and no further L1 settlements - -**Scenario**: A single divergent leaf was settled on L1, no additional bridges have occurred on L2 since, and no further settlements have been made on L1. - -``` -L2: B1 -> LET_1, B2 -> LET_2 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X -``` - -**Diagnosis check**: - -```bash -# Confirm: L2 deposit count == L1 divergence point (e.g., 2) -# L1 settled deposit count == divergence point + number of divergent leaves (e.g., 3) -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 2 - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 3 -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. forwardLET — add BX to match L1 -# BX leaf data must be obtained from the settled certificate (see Diagnosis Step 6) -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF_TYPE,BX_ORIGIN_NET,BX_ORIGIN_ADDR,BX_DEST_NET,BX_DEST_ADDR,BX_AMOUNT,BX_METADATA)]" \ - $LET_X \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# 4. Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 3 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: LET_X - -# 5. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 6. (Optional) Re-collateralize, then start the aggsender -``` - -This is the simplest case: no backward operation is needed since L2 has no extra leaves beyond the divergence point. - -**Collateralization**: The bridge is **undercollateralized** by `amount(BX)` — L1 has credited those assets as having left L2, but they were never actually burned on L2. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This realigns the LBT on L2 with the LBT tracked by the AggLayer node. If the amount is significant, this must be done before starting the `aggsender` (step 6 above), as the AggLayer will reject the next certificate if the LBT shows a negative balance. - ---- - -#### Case 2: Divergence with further L2 bridges but no further L1 settlements - -**Scenario**: After the divergent leaf was settled on L1, additional bridges happened on L2 (but no further settlements occurred on L1). - -``` -L2: B1 -> LET_1, B2 -> LET_2, B3 -> LET_3, B4 -> LET_4 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X -``` - -L2 has leaves B3 and B4 that were added after the divergence point. These must be removed, the divergent leaf inserted, and then the legitimate leaves re-added. - -**Diagnosis check**: - -```bash -# L2 has more deposits than the divergence point -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 4 (divergence point 2 + 2 extra L2 bridges) - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 3 (divergence point 2 + 1 divergent leaf) - -# Collect leaf data for B3 and B4 (the L2 bridges to re-add) -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=3" | jq . -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=4" | jq . -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. backwardLET — roll back to deposit count 2 (removing B3 and B4) -# NEW_FRONTIER, NEXT_LEAF, PROOF must be computed off-chain -cast send $BRIDGE_L2_ADDR \ - "backwardLET(uint256,bytes32[32],bytes32,bytes32[32])" \ - 2 \ - "$NEW_FRONTIER" \ - $NEXT_LEAF \ - "$PROOF" \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify rollback -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 2 - -# 4. forwardLET — add BX, then B3, B4 in a single call -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF...),(B3_LEAF...),(B4_LEAF...)]" \ - $EXPECTED_LER \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 5 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: EXPECTED_LER - -# 5. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 6. (Optional) Re-collateralize, then start the aggsender -``` - -After recovery, the L2 LET will contain: B1, B2, BX, B3, B4 — with the first three matching L1's settled state. - -**Collateralization**: Same exposure as Case 1 — the bridge is **undercollateralized** by `amount(BX)`. The legitimate re-added leaves (B3, B4) correspond to real L2 events and do not contribute to undercollateralization. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This must be done before starting the `aggsender` if the resulting negative LBT balance would cause the next certificate to be rejected. - ---- - -#### Case 3: Divergence with no further L2 bridges but continued L1 settlements - -**Scenario**: Multiple settlements have occurred on L1 after the first divergent one, but no additional bridges happened on L2. - -``` -L2: B1 -> LET_1, B2 -> LET_2 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X, BY -> LET_Y -``` - -**Diagnosis check**: - -```bash -# L2 deposit count == divergence point -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 2 - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 4 (divergence point 2 + 2 divergent leaves) -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. forwardLET — add BX and BY to match L1 -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF...),(BY_LEAF...)]" \ - $LET_Y \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 4 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: LET_Y - -# 4. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 5. Re-collateralize (URGENT), then start the aggsender -``` - -No backward operation is needed since L2 has no extra leaves. The `forwardLET` call can batch-insert all missing leaves in a single transaction. - -**Collateralization**: The bridge is **undercollateralized** by `amount(BX) + amount(BY)`. This is the most collateralization-sensitive case among those with no backward step, as multiple bad settlements have accumulated. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX) + amount(BY)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This is **urgent** — the AggLayer will reject the next certificate if the LBT shows a negative balance, so this must be done before starting the `aggsender`. - ---- - -#### Case 4: Divergence with both further L2 bridges and continued L1 settlements - -**Scenario**: This is the most complex case. After the divergence, both additional bridges occurred on L2 and additional settlements were made on L1. - -``` -L2: B1 -> LET_1, B2 -> LET_2, B3 -> LET_3, B4 -> LET_4 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X, BY -> LET_Y -``` - -L2 has extra leaves (B3, B4) and L1 has settled additional leaves (BX, BY) beyond the divergence point. - -**Diagnosis check**: - -```bash -# L2 has more deposits than the divergence point -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 4 - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 4 (2 matching + 2 divergent) - -# The LERs will differ -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# L2 root != L1 settled_ler, even though deposit counts may match - -# Collect leaf data for B3 and B4 -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=3" | jq . -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=4" | jq . -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. backwardLET — roll back to deposit count 2 (removing B3 and B4) -cast send $BRIDGE_L2_ADDR \ - "backwardLET(uint256,bytes32[32],bytes32,bytes32[32])" \ - 2 \ - "$NEW_FRONTIER" \ - $NEXT_LEAF \ - "$PROOF" \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify rollback -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 2 - -# 4. forwardLET — add BX, BY (divergent), then B3, B4 (legitimate) in a single call -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF...),(BY_LEAF...),(B3_LEAF...),(B4_LEAF...)]" \ - $EXPECTED_LER \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 6 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: EXPECTED_LER - -# 5. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 6. Re-collateralize (URGENT), then start the aggsender -``` - -After recovery, the L2 LET will contain: B1, B2, BX, BY, B3, B4 — with the first four matching L1's settled state. - -**Collateralization**: The bridge is **undercollateralized** by `amount(BX) + amount(BY)`. This is the worst-case scenario: multiple bad settlements on L1 combined with legitimate L2 bridge activity. The legitimate re-added leaves (B3, B4) correspond to real L2 events and do not add to the undercollateralization. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX) + amount(BY)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This must be done before starting the `aggsender`. Given that multiple invalid settlements have occurred, this is the case where the negative LBT balance is most likely to block the very next certificate. - ---- - -#### Important considerations across all cases -- **Re-collateralization**: The bridge will always be undercollateralized after recovery by the sum of amounts of all divergent leaves. Re-collateralization (bridge from another chain -> claim on L2 -> burn) must be completed before starting the `aggsender` whenever the resulting negative LBT balance would cause the next certificate to be rejected. See each case above for the specific amounts involved. -- **Stop aggsender first**: Always stop the `aggsender` before starting any recovery operations and only start it again after everything is complete (including deactivating emergency mode and re-collateralizing if needed). -- **Certificate crafting**: After recovery, the `aggsender` must craft a certificate that covers the block range containing all the `BackwardLET` and `ForwardLET` events. The certificate's initial block must be correct and all events in the range must be included. -- **Event parsing**: The `aggsender` must correctly handle `BackwardLET` events (removing leaves from its DB) and `ForwardLET` events (adding leaves to its DB) to maintain internal consistency. -- **Single `forwardLET` call**: Since `forwardLET` accepts an array of leaves, the divergent leaves and the re-added legitimate bridges should be combined into a single call when possible (e.g., `forwardLET([BX, B3, B4], ...)`), reducing the number of transactions. -- **Order of operations matters**: The `backwardLET` must always come before `forwardLET` when both are needed, since `backwardLET` requires the current tree state to compute valid Merkle proofs. After a `forwardLET`, the tree state has changed and any previously computed proofs for `backwardLET` would be invalid. - -## Appendix: API and gRPC reference - -### AggLayer gRPC — `NodeStateService` - -**Proto package**: `agglayer.node.v1` - -| RPC Method | Description | Key response fields | -|------------|-------------|---------------------| -| `GetNetworkInfo` | Current network state and settlement info | `settled_ler`, `settled_let_leaf_count`, `settled_height`, `settled_certificate_id`, `network_status` | -| `GetLatestCertificateHeader` | Latest certificate (settled or pending) | `prev_local_exit_root`, `new_local_exit_root`, `height`, `status`, `error` | -| `GetCertificateHeader` | Specific certificate by ID | Same as above | - -**`CertificateStatus` enum values**: `PENDING` (1), `PROVEN` (2), `CANDIDATE` (3), `IN_ERROR` (4), `SETTLED` (5) - -**`LatestCertificateRequestType` enum values**: `LATEST_CERTIFICATE_REQUEST_TYPE_SETTLED`, `LATEST_CERTIFICATE_REQUEST_TYPE_PENDING` - -### Bridge Service REST API - -**Base path**: `/bridge/v1` - -| Endpoint | Method | Key params | Description | -|----------|--------|------------|-------------| -| `/bridge-by-deposit-count` | GET | `network_id`, `deposit_count` | Get a single bridge by deposit count and network | -| `/bridges` | GET | `network_id`, `page_number`, `page_size` | Paginated list of bridges for a network | -| `/sync-status` | GET | — | Compare on-chain vs synced deposit counts | -| `/claim-proof` | GET | `network_id`, `leaf_index`, `deposit_count` | Merkle proofs for local and rollup exit roots | -| `/l1-info-tree-index` | GET | `network_id`, `deposit_count` | First L1 info tree index after a deposit count | - -### Smart contract view functions (`AgglayerBridgeL2`) - -| Function | Returns | Description | -|----------|---------|-------------| -| `depositCount()` | `uint256` | Current number of deposits in the LET | -| `getRoot()` | `bytes32` | Current Merkle root (LER) of the LET | -| `isEmergencyState()` | `bool` | Whether emergency mode is active | -| `networkID()` | `uint32` | Network ID of this L2 chain | -| `emergencyBridgePauser()` | `address` | Account that can activate emergency state | -| `emergencyBridgeUnpauser()` | `address` | Account that can deactivate emergency state | - -### Smart contract view functions (`AgglayerGERL2`) - -| Function | Returns | Description | -|----------|---------|-------------| -| `globalExitRootRemover()` | `address` | Account that can call `backwardLET`/`forwardLET` | -| `globalExitRootUpdater()` | `address` | Account that can insert global exit roots | diff --git a/tools/backward_forward_let/RECOVERY_PROCEDURE.md b/tools/backward_forward_let/RECOVERY_PROCEDURE.md index dbd0b02b7..4d5ff084a 100644 --- a/tools/backward_forward_let/RECOVERY_PROCEDURE.md +++ b/tools/backward_forward_let/RECOVERY_PROCEDURE.md @@ -1,271 +1,11 @@ -# Backward/Forward LET — Fallback Recovery Procedure +# Backward/Forward LET Recovery Procedure -This document is the canonical fallback procedure for `backward-forward-let` when the -aggsender database is empty, has been wiped, or otherwise cannot provide certificate -bridge exits. +The canonical operator workflow now lives in the public runbook: -In this situation the tool can still diagnose the settled AggLayer state, but it cannot -complete the divergence walk from aggsender data alone. The operator must extract the -missing certificate bridge exits from the AggLayer admin API, build an override file, -and rerun the tool with `--cert-exits-file`. +- [Backward/Forward LET Recovery](https://github.com/agglayer/runbooks/blob/main/operations/backward-forward-let-recovery.md) ---- +Use that runbook for diagnosis, missing certificate exits, `export-cert-exits`, +`--cert-exits-file`, recovery execution, verification, and escalation data. -## Prerequisites - -- The agglayer node must have `debug-mode = true` in its configuration. - In the op-pp E2E environment this is already set (`debug-mode = true` in - `test/e2e/envs/op-pp/config/agglayer/config.toml`). -- The agglayer admin JSON-RPC API must be reachable (default port 4446). - The URL is exposed as `agglayer.services.admin_api.external` in `summary.json`. -- `curl` and `jq` must be installed on the operator's machine (`jq` is optional but - makes the JSON manipulation much more convenient). - ---- - -## Step 1 — Run the tool to discover missing cert IDs - -```bash -backward-forward-let --cfg aggkit-config.toml -``` - -When the aggsender DB is empty the tool prints an actionable report: - -``` -WARNING: Aggsender RPC returned no bridge exit data for the following certificate heights. -Recovery cannot proceed until this data is provided. - -Missing certificates (2 heights): - Height 3 CertID: 0xabc123...def456 [ID auto-resolved] - Height 2 CertID: UNKNOWN [contact agglayer admin for cert ID] -``` - -- **`[ID auto-resolved]`** — the tool resolved the cert ID from the agglayer gRPC. You can - call `admin_getCertificate` directly in Step 2. -- **`UNKNOWN`** — the cert ID could not be resolved automatically (only the latest settled - height is resolvable via the public gRPC). The agglayer admin must look up - `(network_id, height)` in the `certificate_per_network_cf` column family of the agglayer - state DB and supply the cert ID manually before you can proceed. - -Important operator note: - -- After an aggsender DB wipe, the missing range may span the full settled history - (`0..latest settled height`), not just the newest malicious certificate. That is normal - for this fallback path. -- A large missing range is not a signal that the operator should do hundreds or thousands - of manual one-by-one admin lookups. -- For large ranges, use automation: either script the `admin_getCertificate` calls for all - known cert IDs, or ask the agglayer admin for a batch export of cert IDs and bridge exits. - ---- - -## Step 2 — Fetch each certificate from the agglayer admin API - -For small ranges, you can call `admin_getCertificate` manually per cert ID. For large -ranges, script this step or ask the agglayer admin for a batch export instead. - -Per-certificate example: - -```bash -AGGLAYER_ADMIN="http://localhost:4446" -CERT_ID="0xabc123...def456" - -curl -s -X POST "$AGGLAYER_ADMIN" \ - -H "Content-Type: application/json" \ - -d "{\"jsonrpc\":\"2.0\",\"method\":\"admin_getCertificate\",\"params\":[\"$CERT_ID\"],\"id\":1}" \ - | jq '.' -``` - -The response is a JSON-RPC result where `result` is a two-element array -`[Certificate, CertificateHeader|null]`: - -```json -{ - "jsonrpc": "2.0", - "id": 1, - "result": [ - { - "network_id": 1, - "height": 3, - "bridge_exits": [ ... ], - ... - }, - { ... } - ] -} -``` - -You need `result[0].bridge_exits` from each response. - ---- - -## Step 3 — Build the JSON override file - -### Field name note - -The override file uses the **Go `json` tag names** from `agglayertypes.BridgeExit`: - -| Go field | JSON key | -|----------------------|------------------------| -| `LeafType` | `leaf_type` | -| `TokenInfo` | `token_info` | -| `DestinationNetwork` | `dest_network` | -| `DestinationAddress` | `dest_address` | -| `Amount` | `amount` (decimal string) | -| `Metadata` | `metadata` (base64 or null) | - -The agglayer Rust serde may use different field names (e.g., `destination_network` -instead of `dest_network`). **Do not paste the raw `jq` output directly** unless you -have verified the field names match. The safest approach is to let Go do the translation -by using a small helper script (see below). - -### Option A — Shell script (single cert, no Go tooling) - -Verify that the field names in the admin API response match the table above before using -this option. If they do, you can pipe the `bridge_exits` array straight into the file: - -```bash -AGGLAYER_ADMIN="http://localhost:4446" -CERT_ID="0xabc123...def456" -HEIGHT=3 -NETWORK_ID=1 - -BRIDGE_EXITS=$(curl -s -X POST "$AGGLAYER_ADMIN" \ - -H "Content-Type: application/json" \ - -d "{\"jsonrpc\":\"2.0\",\"method\":\"admin_getCertificate\",\"params\":[\"$CERT_ID\"],\"id\":1}" \ - | jq '.result[0].bridge_exits') - -cat > certificate_exits_override.json < certificate_exits_override.json <= height { + printCertStatus(info, cfg.BackwardForwardLET.L2NetworkID, height) + fmt.Printf("Wait complete: height %d is settled.\n", height) + return nil + } + } + if time.Now().After(deadline) { + printCertStatus(info, cfg.BackwardForwardLET.L2NetworkID, c.Uint64("height")) + return fmt.Errorf("timed out after %s waiting for requested certificate status", timeout) + } + select { + case <-c.Context.Done(): + return c.Context.Err() + case <-time.After(15 * time.Second): + } + } + } + + info, _, err := getNetworkInfoAllowNotFound(context.Background(), client, cfg.BackwardForwardLET.L2NetworkID) + if err != nil { + return err + } + printCertStatus(info, cfg.BackwardForwardLET.L2NetworkID, c.Uint64("height")) + return nil +} + +func printCertStatus(info agglayertypes.NetworkInfo, networkID uint32, requestedHeight uint64) { + printCertStatusTo(os.Stdout, info, networkID, requestedHeight) +} + +func printCertStatusTo(w io.Writer, info agglayertypes.NetworkInfo, networkID uint32, requestedHeight uint64) { + fmt.Fprintf(w, "Network ID: %d\n", networkID) + if info.SettledHeight == nil { + fmt.Fprintln(w, "Latest settled height: none") + } else { + fmt.Fprintf(w, "Latest settled height: %d\n", *info.SettledHeight) + } + if info.SettledCertificateID != nil { + fmt.Fprintf(w, "Latest settled certificate ID: %s\n", info.SettledCertificateID.Hex()) + } + if info.SettledLER != nil { + fmt.Fprintf(w, "Latest settled LER: %s\n", info.SettledLER.Hex()) + } + if info.SettledLETLeafCount != nil { + fmt.Fprintf(w, "Latest settled deposit count: %d\n", *info.SettledLETLeafCount) + } + + if info.LatestPendingHeight == nil { + fmt.Fprintln(w, "Latest pending certificate: none") + } else { + fmt.Fprintf(w, "Latest pending height: %d\n", *info.LatestPendingHeight) + status := "unknown" + if info.LatestPendingStatus != nil { + status = info.LatestPendingStatus.String() + } + fmt.Fprintf(w, "Latest pending status: %s\n", status) + if info.LatestPendingError != "" { + fmt.Fprintf(w, "Latest pending error: %s\n", info.LatestPendingError) + } + } + + if requestedHeight > 0 || (info.SettledHeight != nil && *info.SettledHeight == 0) { + fmt.Fprintf(w, "Requested height: %d\n", requestedHeight) + switch { + case info.SettledHeight != nil && *info.SettledHeight >= requestedHeight: + fmt.Fprintf(w, "Requested height status: Settled\n") + case info.LatestPendingHeight != nil && *info.LatestPendingHeight == requestedHeight && info.LatestPendingStatus != nil: + fmt.Fprintf(w, "Requested height status: %s\n", info.LatestPendingStatus.String()) + default: + fmt.Fprintln(w, "Requested height status: not settled") + } + } +} diff --git a/tools/backward_forward_let/cert_status_test.go b/tools/backward_forward_let/cert_status_test.go new file mode 100644 index 000000000..4140abd60 --- /dev/null +++ b/tools/backward_forward_let/cert_status_test.go @@ -0,0 +1,56 @@ +package backward_forward_let + +import ( + "bytes" + "testing" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + "github.com/ethereum/go-ethereum/common" + "github.com/stretchr/testify/require" +) + +func TestHasOpenPendingAtOrAbove(t *testing.T) { + t.Parallel() + + pendingHeight := uint64(10) + openStatus := agglayertypes.Pending + closedStatus := agglayertypes.Settled + + require.True(t, hasOpenPendingAtOrAbove(agglayertypes.NetworkInfo{ + LatestPendingHeight: &pendingHeight, + LatestPendingStatus: &openStatus, + }, 10)) + require.False(t, hasOpenPendingAtOrAbove(agglayertypes.NetworkInfo{ + LatestPendingHeight: &pendingHeight, + LatestPendingStatus: &closedStatus, + }, 10)) + require.False(t, hasOpenPendingAtOrAbove(agglayertypes.NetworkInfo{ + LatestPendingHeight: &pendingHeight, + LatestPendingStatus: &openStatus, + }, 11)) +} + +func TestPrintCertStatus(t *testing.T) { + t.Parallel() + + settledHeight := uint64(12) + settledID := common.HexToHash("0xabc") + settledLER := common.HexToHash("0xdef") + settledDC := uint64(34) + pendingHeight := uint64(13) + pendingStatus := agglayertypes.Pending + + var buf bytes.Buffer + printCertStatusTo(&buf, agglayertypes.NetworkInfo{ + SettledHeight: &settledHeight, + SettledCertificateID: &settledID, + SettledLER: &settledLER, + SettledLETLeafCount: &settledDC, + LatestPendingHeight: &pendingHeight, + LatestPendingStatus: &pendingStatus, + }, 1, 12) + + output := buf.String() + require.Contains(t, output, "Latest settled height: 12") + require.Contains(t, output, "Requested height status: Settled") +} diff --git a/tools/backward_forward_let/cmd/main.go b/tools/backward_forward_let/cmd/main.go index ea55020cf..d3db946f7 100644 --- a/tools/backward_forward_let/cmd/main.go +++ b/tools/backward_forward_let/cmd/main.go @@ -25,6 +25,10 @@ func main() { Name: "yes", Usage: "Skip interactive confirmation and execute the recovery plan immediately", }, + &cli.BoolFlag{ + Name: "diagnose-only", + Usage: "Print diagnosis and recovery plan, then stop without prompting or sending recovery transactions", + }, &cli.StringFlag{ Name: "cert-exits-file", Aliases: []string{"f"}, @@ -36,7 +40,7 @@ func main() { app.Commands = []*cli.Command{ { Name: "send-cert", - Usage: "Send a certificate to the agglayer and record it in the aggsender DB", + Usage: "Send a certificate to the agglayer and optionally record it in the aggsender DB", Flags: []cli.Flag{ &cli.StringFlag{ Name: "cert-json", @@ -48,9 +52,16 @@ func main() { Usage: "Path to a file containing the certificate JSON (mutually exclusive with --cert-json)", }, &cli.StringFlag{ - Name: "db-path", - Usage: "Path to the aggsender SQLite DB file (e.g. /path/to/aggsender.sqlite)", - Required: true, + Name: "db-path", + Usage: "Path to the aggsender SQLite DB file (e.g. /path/to/aggsender.sqlite)", + }, + &cli.BoolFlag{ + Name: "no-db", + Usage: "Staging-only: send the certificate without recording it in the aggsender DB", + }, + &cli.BoolFlag{ + Name: "staging-only", + Usage: "Required when using staging-only send modes such as --no-db", }, &cli.StringFlag{ Name: "signer-key-path", @@ -63,6 +74,45 @@ func main() { }, Action: backward_forward_let.RunSendCert, }, + { + Name: "craft-cert", + Usage: "Staging-only: craft a testing certificate for a backward/forward LET drill", + Flags: []cli.Flag{ + &cli.BoolFlag{Name: "staging-only", Usage: "Required safety confirmation for testing certificate crafting"}, + &cli.UintFlag{Name: "num-fake-exits", Usage: "Number of fake bridge exits to include"}, + &cli.StringFlag{Name: "amount", Value: "0", Usage: "Fake bridge exit amount"}, + &cli.UintFlag{Name: "starting-exit-index", Usage: "Starting index for deterministic fake exit uniqueness"}, + &cli.StringFlag{Name: "nonce", Usage: "Optional nonce used to derive fake exit destination addresses"}, + &cli.Uint64Flag{Name: "l1-info-tree-leaf-count", Usage: "Override L1 info tree leaf count when aggsender header data is unavailable"}, + &cli.Uint64Flag{Name: "signer-index", Usage: "Multisig signer index to write into the crafted certificate"}, + &cli.StringFlag{Name: "out", Usage: "Output path for the crafted certificate JSON", Required: true}, + }, + Action: backward_forward_let.RunCraftCert, + }, + { + Name: "cert-status", + Usage: "Print AggLayer certificate settlement and pending status", + Flags: []cli.Flag{ + &cli.Uint64Flag{Name: "height", Usage: "Certificate height to check"}, + &cli.BoolFlag{Name: "wait-no-pending", Usage: "Wait until AggLayer has no open pending certificate"}, + &cli.BoolFlag{Name: "wait-settled", Usage: "Wait until --height is settled"}, + &cli.DurationFlag{Name: "timeout", Value: backward_forward_let.DefaultCertStatusTimeout, Usage: "Maximum wait duration"}, + }, + Action: backward_forward_let.RunCertStatus, + }, + { + Name: "export-cert-exits", + Usage: "Export a certificate-exits override from an authoritative height-to-cert-ID map", + Flags: []cli.Flag{ + &cli.StringFlag{Name: "agglayer-admin-url", Usage: "Read-only AggLayer admin JSON-RPC URL", Required: true}, + &cli.StringFlag{Name: "cert-ids-file", Usage: "JSON file mapping certificate heights to cert IDs", Required: true}, + &cli.StringFlag{Name: "out", Usage: "Output certificate exits override JSON path", Required: true}, + &cli.StringFlag{Name: "manifest-out", Usage: "Output source manifest JSON path (default: .manifest.json)"}, + &cli.Uint64Flag{Name: "max-certs", Value: backward_forward_let.DefaultExportCertExitsMaxCerts, Usage: "Maximum certificates to export in one batch"}, + &cli.DurationFlag{Name: "timeout", Value: backward_forward_let.DefaultExportCertExitsTimeout, Usage: "Maximum export duration"}, + }, + Action: backward_forward_let.RunExportCertExits, + }, } if err := app.Run(os.Args); err != nil { diff --git a/tools/backward_forward_let/config.go b/tools/backward_forward_let/config.go index 685221f04..605d4e9f6 100644 --- a/tools/backward_forward_let/config.go +++ b/tools/backward_forward_let/config.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/agglayer/aggkit/agglayer" + aggsenderconfig "github.com/agglayer/aggkit/aggsender/config" "github.com/agglayer/aggkit/bridgesync" aggkitConfig "github.com/agglayer/aggkit/config" ethermanconfig "github.com/agglayer/aggkit/etherman/config" @@ -26,6 +27,9 @@ type Config struct { // AgglayerClient is the AggLayer gRPC client configuration. AgglayerClient agglayer.ClientConfig `mapstructure:"AgglayerClient"` + // AggSender contains the signer config used to craft staging certificates. + AggSender aggsenderconfig.Config `mapstructure:"AggSender"` + // BackwardForwardLET contains tool-specific settings. BackwardForwardLET BackwardForwardLETConfig `mapstructure:"BackwardForwardLET"` } @@ -53,8 +57,8 @@ type BackwardForwardLETConfig struct { // CertificateExitsFile is an optional path to a JSON override file containing // pre-extracted bridge exits keyed by certificate height. When set, used as a // fallback if the aggsender RPC cannot supply bridge exits for a height. - // Obtain the file by calling admin_getCertificate on the agglayer for each - // cert ID reported in the tool's missing-cert output. + // Prefer generating the file with the export-cert-exits subcommand from an + // authoritative height-to-cert-ID map. CertificateExitsFile string `mapstructure:"CertificateExitsFile"` } diff --git a/tools/backward_forward_let/craft_cert.go b/tools/backward_forward_let/craft_cert.go new file mode 100644 index 000000000..e04543539 --- /dev/null +++ b/tools/backward_forward_let/craft_cert.go @@ -0,0 +1,295 @@ +package backward_forward_let + +import ( + "context" + "encoding/json" + "fmt" + "math/big" + "os" + "path/filepath" + "time" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + "github.com/agglayer/aggkit/aggsender/validator" + bridgetypes "github.com/agglayer/aggkit/bridgesync/types" + "github.com/agglayer/aggkit/log" + "github.com/agglayer/go_signer/signer" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/urfave/cli/v2" +) + +// RunCraftCert builds a staging-only testing certificate and writes it as JSON. +func RunCraftCert(c *cli.Context) error { + if !c.Bool("staging-only") { + return fmt.Errorf("craft-cert is dangerous and requires --staging-only") + } + if c.Uint("num-fake-exits") == 0 { + return fmt.Errorf("--num-fake-exits must be greater than zero") + } + + cfg, err := LoadConfig(c) + if err != nil { + return err + } + if f := c.String("cert-exits-file"); f != "" { + cfg.BackwardForwardLET.CertificateExitsFile = f + } + + dialCtx, dialCancel := context.WithTimeout(c.Context, dialTimeout) + env, err := SetupEnv(dialCtx, cfg) + dialCancel() + if err != nil { + return err + } + defer env.Close() + + cert, err := craftStagingCertificate(c.Context, env, craftCertOptions{ + NumFakeExits: uint32(c.Uint("num-fake-exits")), + Amount: c.String("amount"), + StartingExitIndex: uint32(c.Uint("starting-exit-index")), + Nonce: c.String("nonce"), + L1InfoTreeLeafCount: uint32(c.Uint64("l1-info-tree-leaf-count")), + SignerIndex: uint32(c.Uint64("signer-index")), + CertificateOutputFile: c.String("out"), + RequireNoOpenCerts: true, + AllowL1InfoCountSource: true, + }) + if err != nil { + return err + } + + data, err := json.MarshalIndent(cert, "", " ") + if err != nil { + return fmt.Errorf("marshal crafted certificate: %w", err) + } + out := filepath.Clean(c.String("out")) + if err := os.WriteFile(out, data, 0o600); err != nil { + return fmt.Errorf("write crafted certificate %s: %w", out, err) + } + + fmt.Println("STAGING ONLY: crafted testing certificate.") + fmt.Printf("Certificate height: %d\n", cert.Height) + fmt.Printf("Previous local exit root: %s\n", cert.PrevLocalExitRoot.Hex()) + fmt.Printf("New local exit root: %s\n", cert.NewLocalExitRoot.Hex()) + fmt.Printf("Fake bridge exits: %d\n", len(cert.BridgeExits)) + fmt.Printf("Certificate file: %s\n", out) + fmt.Printf("Next: backward-forward-let --cfg send-cert --cert-file %s --no-db --staging-only\n", out) + return nil +} + +type craftCertOptions struct { + NumFakeExits uint32 + Amount string + StartingExitIndex uint32 + Nonce string + L1InfoTreeLeafCount uint32 + SignerIndex uint32 + CertificateOutputFile string + RequireNoOpenCerts bool + AllowL1InfoCountSource bool +} + +func craftStagingCertificate( + ctx context.Context, + env *Env, + opts craftCertOptions, +) (*agglayertypes.Certificate, error) { + amount, ok := new(big.Int).SetString(opts.Amount, 10) + if !ok || amount.Sign() < 0 { + return nil, fmt.Errorf("--amount must be a non-negative base-10 integer") + } + + info, _, err := getNetworkInfoAllowNotFound(ctx, env.AgglayerClient, env.L2NetworkID) + if err != nil { + return nil, err + } + + var certHeight uint64 + var prevLER common.Hash + var existingLeafCount uint32 + l1InfoTreeLeafCount := opts.L1InfoTreeLeafCount + + if info.SettledHeight != nil { + certHeight = *info.SettledHeight + 1 + if info.SettledLER == nil || info.SettledLETLeafCount == nil { + return nil, fmt.Errorf("agglayer settled state is missing LER or LET leaf count") + } + prevLER = *info.SettledLER + existingLeafCount = uint32(*info.SettledLETLeafCount) + if opts.RequireNoOpenCerts && hasOpenPendingAtOrAbove(info, certHeight) { + return nil, fmt.Errorf("pending certificate race: latest pending height/status is %s; wait for it to settle or enter InError before crafting", pendingSummary(info)) + } + if l1InfoTreeLeafCount == 0 { + count, err := l1InfoTreeLeafCountFromAggsender(env, *info.SettledHeight) + if err != nil { + return nil, fmt.Errorf("get L1 info tree leaf count from aggsender for height %d: %w; rerun with --l1-info-tree-leaf-count", *info.SettledHeight, err) + } + l1InfoTreeLeafCount = count + } + } else { + if opts.RequireNoOpenCerts && hasOpenPendingAtOrAbove(info, 0) { + return nil, fmt.Errorf("pending certificate race: latest pending height/status is %s; wait for it to settle or enter InError before crafting", pendingSummary(info)) + } + callOpts := &bind.CallOpts{Context: ctx} + root, err := env.L2Bridge.GetRoot(callOpts) + if err != nil { + return nil, fmt.Errorf("get initial L2 bridge root: %w", err) + } + prevLER = common.Hash(root) + dcBig, err := env.L2Bridge.DepositCount(callOpts) + if err != nil { + return nil, fmt.Errorf("get initial L2 deposit count: %w", err) + } + existingLeafCount = uint32(dcBig.Uint64()) + if l1InfoTreeLeafCount == 0 { + l1InfoTreeLeafCount = 1 + } + } + + existingHashes, err := stagingExistingLeafHashes(ctx, env, info.SettledHeight, existingLeafCount) + if err != nil { + return nil, err + } + + fakeExits := makeFakeBridgeExits(opts.NumFakeExits, opts.StartingExitIndex, opts.Nonce, amount) + newHashes := make([]common.Hash, 0, len(fakeExits)) + for _, be := range fakeExits { + newHashes = append(newHashes, BridgeExitLeafHash(be)) + } + newLER, err := ComputeLERForNewLeaves(existingHashes, newHashes) + if err != nil { + return nil, fmt.Errorf("compute new local exit root: %w", err) + } + + cert := &agglayertypes.Certificate{ + NetworkID: env.L2NetworkID, + Height: certHeight, + PrevLocalExitRoot: prevLER, + NewLocalExitRoot: newLER, + BridgeExits: fakeExits, + ImportedBridgeExits: nil, + L1InfoTreeLeafCount: l1InfoTreeLeafCount, + CustomChainData: nil, + AggchainData: nil, + } + if err := signStagingCertificate(ctx, env, cert, opts.SignerIndex); err != nil { + return nil, err + } + return cert, nil +} + +func stagingExistingLeafHashes( + ctx context.Context, + env *Env, + settledHeight *uint64, + existingLeafCount uint32, +) ([]common.Hash, error) { + if settledHeight == nil { + return fetchL2LeafHashesUpTo(ctx, env, existingLeafCount) + } + hashes := make([]common.Hash, 0, existingLeafCount) + for h := uint64(0); h <= *settledHeight; h++ { + exits, err := getBridgeExitsForHeight(env, h) + if err != nil { + return nil, fmt.Errorf("load historical bridge exits for cert height %d: %w", h, err) + } + for _, be := range exits { + hashes = append(hashes, BridgeExitLeafHash(be)) + } + } + return hashes, nil +} + +func l1InfoTreeLeafCountFromAggsender(env *Env, settledHeight uint64) (uint32, error) { + cert, err := env.AggsenderRPC.GetCertificateHeaderPerHeight(&settledHeight) + if err != nil { + return 0, err + } + if cert == nil || cert.Header == nil || cert.Header.L1InfoTreeLeafCount == 0 { + return 0, fmt.Errorf("aggsender returned no L1InfoTreeLeafCount") + } + return cert.Header.L1InfoTreeLeafCount, nil +} + +func signStagingCertificate(ctx context.Context, env *Env, cert *agglayertypes.Certificate, signerIndex uint32) error { + l2ChainID, err := env.chainIDFn(ctx) + if err != nil { + return fmt.Errorf("get L2 chain ID: %w", err) + } + s, err := signer.NewSigner( + ctx, + l2ChainID.Uint64(), + env.Config.AggSender.AggsenderPrivateKey, + "staging-craft-cert", + log.GetDefaultLogger(), + ) + if err != nil { + return fmt.Errorf("load aggsender signer: %w", err) + } + if err := s.Initialize(ctx); err != nil { + return fmt.Errorf("initialize aggsender signer: %w", err) + } + hashToSign, err := validator.HashCertificateToSign(cert) + if err != nil { + return fmt.Errorf("hash crafted certificate: %w", err) + } + sig, err := s.SignHash(ctx, hashToSign) + if err != nil { + return fmt.Errorf("sign crafted certificate with aggsender signer: %w", err) + } + cert.AggchainData = &agglayertypes.AggchainDataMultisig{ + Multisig: &agglayertypes.Multisig{ + Signatures: []agglayertypes.ECDSAMultisigEntry{ + {Index: signerIndex, Signature: sig}, + }, + }, + } + return nil +} + +func makeFakeBridgeExits(count, startingIndex uint32, nonce string, amount *big.Int) []*agglayertypes.BridgeExit { + if nonce == "" { + nonce = fmt.Sprintf("%d", time.Now().UnixNano()) + } + exits := make([]*agglayertypes.BridgeExit, 0, count) + for i := uint32(0); i < count; i++ { + exitIndex := startingIndex + i + addrBytes := crypto.Keccak256([]byte(fmt.Sprintf("%s:%d", nonce, exitIndex))) + exits = append(exits, &agglayertypes.BridgeExit{ + LeafType: bridgetypes.LeafTypeAsset, + TokenInfo: &agglayertypes.TokenInfo{ + OriginNetwork: 0, + OriginTokenAddress: common.Address{}, + }, + DestinationNetwork: 0, + DestinationAddress: common.BytesToAddress(addrBytes), + Amount: new(big.Int).Set(amount), + Metadata: nil, + }) + } + return exits +} + +func hasOpenPendingAtOrAbove(info agglayertypes.NetworkInfo, height uint64) bool { + if info.LatestPendingHeight == nil || *info.LatestPendingHeight < height { + return false + } + if info.LatestPendingStatus == nil { + return true + } + return info.LatestPendingStatus.IsOpen() +} + +func pendingSummary(info agglayertypes.NetworkInfo) string { + height := "none" + if info.LatestPendingHeight != nil { + height = fmt.Sprintf("%d", *info.LatestPendingHeight) + } + status := "unknown" + if info.LatestPendingStatus != nil { + status = info.LatestPendingStatus.String() + } + return fmt.Sprintf("height=%s status=%s", height, status) +} diff --git a/tools/backward_forward_let/craft_cert_test.go b/tools/backward_forward_let/craft_cert_test.go new file mode 100644 index 000000000..e91f62db6 --- /dev/null +++ b/tools/backward_forward_let/craft_cert_test.go @@ -0,0 +1,28 @@ +package backward_forward_let + +import ( + "math/big" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMakeFakeBridgeExits(t *testing.T) { + t.Parallel() + + exits := makeFakeBridgeExits(2, 7, "test-nonce", big.NewInt(42)) + + require.Len(t, exits, 2) + require.Equal(t, big.NewInt(42), exits[0].Amount) + require.NotEqual(t, exits[0].DestinationAddress, exits[1].DestinationAddress) + require.Equal(t, exits[0].DestinationNetwork, exits[1].DestinationNetwork) +} + +func TestMakeFakeBridgeExits_DeterministicWithNonce(t *testing.T) { + t.Parallel() + + a := makeFakeBridgeExits(1, 1, "same", big.NewInt(0)) + b := makeFakeBridgeExits(1, 1, "same", big.NewInt(0)) + + require.Equal(t, a[0].DestinationAddress, b[0].DestinationAddress) +} diff --git a/tools/backward_forward_let/diagnosis.go b/tools/backward_forward_let/diagnosis.go index 085725c57..f145d3aa6 100644 --- a/tools/backward_forward_let/diagnosis.go +++ b/tools/backward_forward_let/diagnosis.go @@ -12,10 +12,8 @@ import ( bridgeservice "github.com/agglayer/aggkit/bridgeservice/client" bridgeservicetypes "github.com/agglayer/aggkit/bridgeservice/types" "github.com/agglayer/aggkit/bridgesync" - aggkitgrpc "github.com/agglayer/aggkit/grpc" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" - "google.golang.org/grpc/codes" ) // aggsenderRPCClient is the subset of rpcclient.Client used by the tool and its tests. @@ -37,15 +35,14 @@ func Diagnose(ctx context.Context, env *Env) (*DiagnosisResult, error) { result := &DiagnosisResult{Case: NoDivergence} // Step 1 — Query AggLayer settled state. - info, err := env.AgglayerClient.GetNetworkInfo(ctx, env.L2NetworkID) + info, notFound, err := getNetworkInfoAllowNotFound(ctx, env.AgglayerClient, env.L2NetworkID) if err != nil { + return nil, err + } + if notFound { // A NotFound response means the network is not yet known to the agglayer // (no certificates have been settled), so there is no divergence. - var grpcErr aggkitgrpc.GRPCError - if errors.As(err, &grpcErr) && grpcErr.Code == codes.NotFound { - return result, nil - } - return nil, fmt.Errorf("get network info from agglayer: %w", err) + return result, nil } if info.SettledHeight == nil { // Agglayer has no settled certificates for this network. @@ -285,9 +282,9 @@ func collectExtraL2Bridges( br, err := env.BridgeService.GetBridgeByDepositCount(ctx, env.L2NetworkID, dc) if err != nil { if isNotFound(err) { - continue + return nil, fmt.Errorf("bridge service data not ready for recovery: missing L2 bridge at DC=%d; wait for bridge-service indexing and rerun diagnosis", dc) } - return nil, fmt.Errorf("get L2 bridge at DC=%d: %w", dc, err) + return nil, fmt.Errorf("bridge service data not ready for recovery: get L2 bridge at DC=%d: %w", dc, err) } extra = append(extra, BridgeResponseToLeafData(br)) } @@ -358,17 +355,17 @@ func PrintDiagnosis(w io.Writer, result *DiagnosisResult) { fmt.Fprintln(w) } - if result.Case == NoDivergence { - fmt.Fprintln(w, "Case: NoDivergence — L1 settled state and L2 on-chain state are in sync.") + if result.AggsenderAPIFailed { + printMissingCertReport(w, result) return } - if result.AggsenderAPIFailed { - printMissingCertReport(w, result) + if result.IsCompleteNoDivergence() { + fmt.Fprintln(w, "Case: NoDivergence — L1 settled state and L2 on-chain state are in sync.") return } - fmt.Fprintf(w, "Case: %s\n", caseDescription(result.Case)) + fmt.Fprintf(w, "Status: Recovery required - %s\n", recoveryDescription(result.Case)) fmt.Fprintf(w, "Divergence Point (matching leaf count): %d\n", result.DivergencePoint) fmt.Fprintln(w) @@ -436,6 +433,7 @@ func PrintDiagnosis(w io.Writer, result *DiagnosisResult) { // admin_getCertificate on the agglayer, shows the override file template with the // actual heights, and prints the re-run command. func printMissingCertReport(w io.Writer, result *DiagnosisResult) { + fmt.Fprintln(w, "Status: Missing certificate exits - recovery cannot continue yet.") fmt.Fprintln(w, "WARNING: Aggsender RPC returned no bridge exit data for the following certificate heights.") fmt.Fprintln(w, "Recovery cannot proceed until this data is provided.") fmt.Fprintln(w) @@ -475,46 +473,60 @@ func printMissingCertReport(w io.Writer, result *DiagnosisResult) { fmt.Fprintln(w) } - fmt.Fprintln(w, "To extract bridge exits for each KNOWN cert ID:") + fmt.Fprintln(w, "Preferred batch export path:") + fmt.Fprintln(w, " 1. Build an authoritative cert ID map from agglayer admin data:") + fmt.Fprintln(w, " {") + fmt.Fprintln(w, ` "network_id": ,`) + fmt.Fprintln(w, ` "certificates": {`) + for i, mc := range result.MissingCerts { + suffix := "," + if i == n-1 { + suffix = "" + } + certID := "" + if mc.CertIDResolved { + certID = mc.CertID.Hex() + } + fmt.Fprintf(w, " \"%d\": \"%s\"%s\n", mc.Height, certID, suffix) + } + fmt.Fprintln(w, " }") + fmt.Fprintln(w, " }") + fmt.Fprintln(w, " 2. Export the override JSON:") + fmt.Fprintln(w, " backward-forward-let --cfg export-cert-exits \\") + fmt.Fprintln(w, " --agglayer-admin-url \\") + fmt.Fprintln(w, " --cert-ids-file \\") + fmt.Fprintln(w, " --out ") + fmt.Fprintln(w) + + fmt.Fprintln(w, "The exporter calls admin_getCertificate for each cert ID, validates network/height,") + fmt.Fprintln(w, "preserves empty bridge-exit lists, writes a source manifest, and prints the") + fmt.Fprintln(w, "diagnosis/recovery follow-up commands.") + fmt.Fprintln(w) + + fmt.Fprintln(w, "Manual admin API shape for each KNOWN cert ID:") fmt.Fprintln(w, " POST http:///") fmt.Fprintln(w, " Content-Type: application/json") fmt.Fprintln(w) fmt.Fprintln(w, ` {"jsonrpc":"2.0","method":"admin_getCertificate","params":[""],"id":1}`) fmt.Fprintln(w) fmt.Fprintln(w, " The response is [Certificate, CertificateHeader|null].") - fmt.Fprintln(w, ` Extract the "bridge_exits" field from the Certificate object.`) - fmt.Fprintln(w) - - fmt.Fprintln(w, "Build a JSON override file in this format:") - fmt.Fprintln(w, " {") - fmt.Fprintln(w, ` "network_id": ,`) - fmt.Fprintln(w, ` "heights": {`) - for i, mc := range result.MissingCerts { - suffix := "," - if i == n-1 { - suffix = "" - } - fmt.Fprintf(w, " \"%d\": [ ...bridge_exits from admin_getCertificate response... ]%s\n", - mc.Height, suffix) - } - fmt.Fprintln(w, " }") - fmt.Fprintln(w, " }") + fmt.Fprintln(w, ` Use export-cert-exits to extract and re-marshal the "bridge_exits" field.`) fmt.Fprintln(w) fmt.Fprintln(w, "Re-run the tool with:") fmt.Fprintln(w, " backward-forward-let --cfg --cert-exits-file ") } -func caseDescription(c RecoveryCase) string { +func recoveryDescription(c RecoveryCase) string { switch c { case Case1: - return "Case1 — ForwardLET only: single divergent leaf batch, no extra L2 bridges" + return "ForwardLET recovery required for divergent settled bridge exits" case Case2: - return "Case2 — BackwardLET + ForwardLET: single divergent leaf + extra real L2 bridges" + return "BackwardLET and ForwardLET recovery required, including replay of real L2 bridges" case Case3: - return "Case3 — ForwardLET only: multiple divergent leaf batches, no extra L2 bridges" + return "ForwardLET recovery required for multiple divergent settled bridge exits" case Case4: - return "Case4 — BackwardLET + ForwardLET: multiple divergent leaves + extra real L2 bridges" + return "BackwardLET and ForwardLET recovery required, including multiple divergent exits and real L2 bridge replay" default: return string(c) } diff --git a/tools/backward_forward_let/diagnosis_test.go b/tools/backward_forward_let/diagnosis_test.go index ac9ae912c..cb9da809a 100644 --- a/tools/backward_forward_let/diagnosis_test.go +++ b/tools/backward_forward_let/diagnosis_test.go @@ -223,7 +223,8 @@ func TestPrintDiagnosis(t *testing.T) { PrintDiagnosis(&buf, result) output := buf.String() - require.Contains(t, output, "Case3") + require.Contains(t, output, "Status: Recovery required") + require.NotContains(t, output, "Case3") require.Contains(t, output, ler.Hex()) require.Contains(t, output, tokenA.Hex()) require.Contains(t, output, "500") @@ -241,6 +242,14 @@ func TestPrintDiagnosis_NoDivergence(t *testing.T) { require.Contains(t, buf.String(), "NoDivergence") } +func TestDiagnosisResult_IsCompleteNoDivergence(t *testing.T) { + t.Parallel() + + require.True(t, (&DiagnosisResult{Case: NoDivergence}).IsCompleteNoDivergence()) + require.False(t, (&DiagnosisResult{Case: NoDivergence, AggsenderAPIFailed: true}).IsCompleteNoDivergence()) + require.False(t, (&DiagnosisResult{Case: Case1}).IsCompleteNoDivergence()) +} + // TestPrintDiagnosis_AggsenderAPIFailed verifies the actionable missing-cert output // when all cert IDs are resolved (no UNKNOWN entries). func TestPrintDiagnosis_AggsenderAPIFailed(t *testing.T) { @@ -248,7 +257,7 @@ func TestPrintDiagnosis_AggsenderAPIFailed(t *testing.T) { certID := common.HexToHash("0xDEAD") result := &DiagnosisResult{ - Case: Case1, + Case: NoDivergence, AggsenderAPIFailed: true, MissingCerts: []MissingCertInfo{ {Height: 7, CertID: certID, CertIDResolved: true}, @@ -268,6 +277,7 @@ func TestPrintDiagnosis_AggsenderAPIFailed(t *testing.T) { require.Contains(t, output, "admin_getCertificate") require.Contains(t, output, `"7":`) require.Contains(t, output, "--cert-exits-file") + require.NotContains(t, output, "Case: NoDivergence") // No UNKNOWN note when all cert IDs are resolved. require.NotContains(t, output, "UNKNOWN") require.NotContains(t, output, "certificate_per_network_cf") @@ -552,26 +562,29 @@ func TestIsNotFound(t *testing.T) { require.False(t, isNotFound(errors.New("some other error"))) } -// TestCaseDescription verifies caseDescription returns the correct string for each case. -func TestCaseDescription(t *testing.T) { +// TestRecoveryDescription verifies recoveryDescription returns public-facing strings without case labels. +func TestRecoveryDescription(t *testing.T) { t.Parallel() tests := []struct { c RecoveryCase want string }{ - {Case1, "Case1"}, - {Case2, "Case2"}, - {Case3, "Case3"}, - {Case4, "Case4"}, + {Case1, "ForwardLET recovery required"}, + {Case2, "BackwardLET and ForwardLET recovery required"}, + {Case3, "ForwardLET recovery required"}, + {Case4, "BackwardLET and ForwardLET recovery required"}, {NoDivergence, string(NoDivergence)}, // default branch } for _, tc := range tests { t.Run(string(tc.c), func(t *testing.T) { t.Parallel() - got := caseDescription(tc.c) + got := recoveryDescription(tc.c) require.Contains(t, got, tc.want) + if tc.c != NoDivergence { + require.NotContains(t, got, string(tc.c)) + } }) } } @@ -755,7 +768,7 @@ func TestCollectExtraL2Bridges_HappyPath(t *testing.T) { require.Len(t, extra, 2) } -// TestCollectExtraL2Bridges_NotFound verifies that NotFound entries are skipped. +// TestCollectExtraL2Bridges_NotFound verifies that NotFound entries are safe stops. func TestCollectExtraL2Bridges_NotFound(t *testing.T) { t.Parallel() @@ -766,15 +779,16 @@ func TestCollectExtraL2Bridges_NotFound(t *testing.T) { BridgeService: &stubBridgeService{ bridges: map[uint32]*bridgeservicetypes.BridgeResponse{ 3: br3, - // DC 4 is absent → returns ErrNotFound → skipped + // DC 4 is absent and must stop recovery planning. }, }, L2NetworkID: 1, } - extra, err := collectExtraL2Bridges(context.Background(), env, 3, 5) - require.NoError(t, err) - require.Len(t, extra, 1) + _, err := collectExtraL2Bridges(context.Background(), env, 3, 5) + require.Error(t, err) + require.Contains(t, err.Error(), "bridge service data not ready") + require.Contains(t, err.Error(), "DC=4") } // TestCollectExtraL2Bridges_ServiceError verifies a non-NotFound error is propagated. @@ -874,7 +888,8 @@ func TestPrintDiagnosis_WithExtraL2Bridges(t *testing.T) { require.Contains(t, output, "Extra Real L2 Bridges") require.Contains(t, output, "200") - require.Contains(t, output, "Case2") + require.Contains(t, output, "Status: Recovery required") + require.NotContains(t, output, "Case2") } // TestFindDivergencePoint_NonMatchingExits verifies the path where exits from a cert diff --git a/tools/backward_forward_let/export_cert_exits.go b/tools/backward_forward_let/export_cert_exits.go new file mode 100644 index 000000000..7296fb932 --- /dev/null +++ b/tools/backward_forward_let/export_cert_exits.go @@ -0,0 +1,243 @@ +package backward_forward_let + +import ( + "context" + "encoding/hex" + "encoding/json" + "fmt" + "net/url" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "time" + + "github.com/0xPolygon/cdk-rpc/rpc" + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + "github.com/ethereum/go-ethereum/common" + "github.com/urfave/cli/v2" +) + +const ( + DefaultExportCertExitsMaxCerts uint64 = 2000 + DefaultExportCertExitsTimeout = 30 * time.Minute +) + +var fetchAdminCertificate = fetchAgglayerAdminCertificate + +type certIDsFileJSON struct { + NetworkID uint32 `json:"network_id"` + Certificates map[string]string `json:"certificates"` +} + +type exportCertExitsManifest struct { + NetworkID uint32 `json:"network_id"` + Source string `json:"source"` + AgglayerAdminURL string `json:"agglayer_admin_url"` + OverrideFile string `json:"override_file"` + Certificates []exportCertExitsEntry `json:"certificates"` +} + +type exportCertExitsEntry struct { + Height uint64 `json:"height"` + CertificateID string `json:"certificate_id"` + BridgeExitCount int `json:"bridge_exit_count"` +} + +// RunExportCertExits exports an override JSON from a read-only AggLayer admin source. +func RunExportCertExits(c *cli.Context) error { + cfg, err := LoadConfig(c) + if err != nil { + return err + } + + certIDs, err := loadCertIDsFile(c.String("cert-ids-file"), cfg.BackwardForwardLET.L2NetworkID) + if err != nil { + return err + } + if len(certIDs) == 0 { + return fmt.Errorf("cert IDs file contains no certificates") + } + if uint64(len(certIDs)) > c.Uint64("max-certs") { + return fmt.Errorf("refusing to export %d certificates; increase --max-certs if this batch is intended", len(certIDs)) + } + + timeout := c.Duration("timeout") + if timeout <= 0 { + timeout = DefaultExportCertExitsTimeout + } + ctx, cancel := context.WithTimeout(c.Context, timeout) + defer cancel() + + outPath := filepath.Clean(c.String("out")) + manifestPath := c.String("manifest-out") + if manifestPath == "" { + manifestPath = outPath + ".manifest.json" + } + manifestPath = filepath.Clean(manifestPath) + + heights := sortedCertHeights(certIDs) + override := overrideFileJSON{ + NetworkID: cfg.BackwardForwardLET.L2NetworkID, + Description: "generated by backward-forward-let export-cert-exits from agglayer admin_getCertificate", + Heights: make(map[string][]*agglayertypes.BridgeExit, len(heights)), + } + manifest := exportCertExitsManifest{ + NetworkID: cfg.BackwardForwardLET.L2NetworkID, + Source: "agglayer admin_getCertificate", + AgglayerAdminURL: sanitizeAdminURL(c.String("agglayer-admin-url")), + OverrideFile: outPath, + Certificates: make([]exportCertExitsEntry, 0, len(heights)), + } + + for _, height := range heights { + certID := certIDs[height] + cert, err := fetchAdminCertificate(ctx, c.String("agglayer-admin-url"), certID) + if err != nil { + return fmt.Errorf("fetch admin_getCertificate height=%d certID=%s: %w", height, certID.Hex(), err) + } + if err := validateAdminCertificate(cert, cfg.BackwardForwardLET.L2NetworkID, height, certID); err != nil { + return fmt.Errorf("validate admin certificate height=%d certID=%s: %w", height, certID.Hex(), err) + } + exits := cert.BridgeExits + if exits == nil { + exits = []*agglayertypes.BridgeExit{} + } + override.Heights[strconv.FormatUint(height, 10)] = exits + manifest.Certificates = append(manifest.Certificates, exportCertExitsEntry{ + Height: height, + CertificateID: certID.Hex(), + BridgeExitCount: len(exits), + }) + } + + if err := writeJSONFile(outPath, override); err != nil { + return err + } + if err := writeJSONFile(manifestPath, manifest); err != nil { + return err + } + + fmt.Println("Exported certificate exits override.") + fmt.Printf("Network ID: %d\n", cfg.BackwardForwardLET.L2NetworkID) + fmt.Printf("Certificates exported: %d\n", len(heights)) + fmt.Printf("Override file: %s\n", outPath) + fmt.Printf("Source manifest: %s\n", manifestPath) + fmt.Println("Next:") + fmt.Printf(" backward-forward-let --cfg --cert-exits-file %s --diagnose-only\n", outPath) + fmt.Printf(" backward-forward-let --cfg --cert-exits-file %s\n", outPath) + return nil +} + +func loadCertIDsFile(filePath string, expectedNetworkID uint32) (map[uint64]common.Hash, error) { + data, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("read cert IDs file %s: %w", filePath, err) + } + var raw certIDsFileJSON + if err := json.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("parse cert IDs file %s: %w", filePath, err) + } + if raw.NetworkID != 0 && raw.NetworkID != expectedNetworkID { + return nil, fmt.Errorf("cert IDs file %s: network_id %d does not match config L2NetworkID %d", + filePath, raw.NetworkID, expectedNetworkID) + } + if raw.Certificates == nil { + return nil, fmt.Errorf("cert IDs file %s: certificates map is missing", filePath) + } + + parsed := make(map[uint64]common.Hash, len(raw.Certificates)) + for key, value := range raw.Certificates { + height, err := strconv.ParseUint(key, 10, 64) + if err != nil { + return nil, fmt.Errorf("cert IDs file %s: non-numeric height key %q: %w", filePath, key, err) + } + certID, err := parseCertificateID(value) + if err != nil { + return nil, fmt.Errorf("cert IDs file %s: height %d has invalid cert ID: %w", filePath, height, err) + } + parsed[height] = certID + } + return parsed, nil +} + +func parseCertificateID(value string) (common.Hash, error) { + value = strings.TrimSpace(value) + if !strings.HasPrefix(value, "0x") || len(value) != 66 { + return common.Hash{}, fmt.Errorf("must be a 32-byte 0x-prefixed hex string") + } + if _, err := hex.DecodeString(value[2:]); err != nil { + return common.Hash{}, err + } + return common.HexToHash(value), nil +} + +func sortedCertHeights(certIDs map[uint64]common.Hash) []uint64 { + heights := make([]uint64, 0, len(certIDs)) + for height := range certIDs { + heights = append(heights, height) + } + sort.Slice(heights, func(i, j int) bool { return heights[i] < heights[j] }) + return heights +} + +func fetchAgglayerAdminCertificate(ctx context.Context, adminURL string, certID common.Hash) (*agglayertypes.Certificate, error) { + response, err := rpc.JSONRPCCallWithContext(ctx, adminURL, "admin_getCertificate", certID) + if err != nil { + return nil, err + } + if response.Error != nil { + return nil, fmt.Errorf("admin_getCertificate returned error: %v", response.Error) + } + var pair [2]json.RawMessage + if err := json.Unmarshal(response.Result, &pair); err != nil { + return nil, fmt.Errorf("unmarshal admin_getCertificate result as [Certificate, CertificateHeader|null]: %w", err) + } + if len(pair[0]) == 0 || string(pair[0]) == "null" { + return nil, fmt.Errorf("admin_getCertificate returned nil certificate") + } + var cert agglayertypes.Certificate + if err := json.Unmarshal(pair[0], &cert); err != nil { + return nil, fmt.Errorf("unmarshal Certificate from admin_getCertificate result: %w", err) + } + return &cert, nil +} + +func validateAdminCertificate(cert *agglayertypes.Certificate, networkID uint32, height uint64, certID common.Hash) error { + if cert == nil { + return fmt.Errorf("certificate is nil") + } + if cert.NetworkID != networkID { + return fmt.Errorf("network_id %d does not match expected %d", cert.NetworkID, networkID) + } + if cert.Height != height { + return fmt.Errorf("height %d does not match expected %d", cert.Height, height) + } + if calculated := cert.CertificateID(); calculated != certID { + return fmt.Errorf("calculated certificate ID %s does not match expected %s", calculated.Hex(), certID.Hex()) + } + return nil +} + +func writeJSONFile(filePath string, value interface{}) error { + data, err := json.MarshalIndent(value, "", " ") + if err != nil { + return fmt.Errorf("marshal %s: %w", filePath, err) + } + if err := os.WriteFile(filePath, append(data, '\n'), 0o600); err != nil { + return fmt.Errorf("write %s: %w", filePath, err) + } + return nil +} + +func sanitizeAdminURL(rawURL string) string { + parsed, err := url.Parse(rawURL) + if err != nil { + return rawURL + } + parsed.User = nil + parsed.RawQuery = "" + parsed.Fragment = "" + return parsed.String() +} diff --git a/tools/backward_forward_let/export_cert_exits_test.go b/tools/backward_forward_let/export_cert_exits_test.go new file mode 100644 index 000000000..0d1186780 --- /dev/null +++ b/tools/backward_forward_let/export_cert_exits_test.go @@ -0,0 +1,214 @@ +package backward_forward_let + +import ( + "context" + "encoding/json" + "flag" + "math/big" + "os" + "path/filepath" + "testing" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + bridgetypes "github.com/agglayer/aggkit/bridgesync/types" + "github.com/ethereum/go-ethereum/common" + "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" +) + +func TestLoadCertIDsFile(t *testing.T) { + t.Parallel() + + certID := common.HexToHash("0x1111111111111111111111111111111111111111111111111111111111111111") + path := filepath.Join(t.TempDir(), "cert-ids.json") + require.NoError(t, os.WriteFile(path, []byte(`{ + "network_id": 7, + "certificates": { + "42": "`+certID.Hex()+`" + } + }`), 0o600)) + + got, err := loadCertIDsFile(path, 7) + require.NoError(t, err) + require.Equal(t, certID, got[42]) +} + +func TestLoadCertIDsFileRejectsWrongNetwork(t *testing.T) { + t.Parallel() + + path := filepath.Join(t.TempDir(), "cert-ids.json") + require.NoError(t, os.WriteFile(path, []byte(`{"network_id":8,"certificates":{}}`), 0o600)) + + _, err := loadCertIDsFile(path, 7) + require.Error(t, err) + require.Contains(t, err.Error(), "does not match config L2NetworkID") +} + +func TestParseCertificateIDRejectsShortHex(t *testing.T) { + t.Parallel() + + _, err := parseCertificateID("0xdead") + require.Error(t, err) + require.Contains(t, err.Error(), "32-byte") +} + +func TestValidateAdminCertificate(t *testing.T) { + t.Parallel() + + cert := exportTestCertificate(7, 42, nil) + certID := cert.CertificateID() + + require.NoError(t, validateAdminCertificate(cert, 7, 42, certID)) + require.ErrorContains(t, validateAdminCertificate(cert, 8, 42, certID), "network_id") + require.ErrorContains(t, validateAdminCertificate(cert, 7, 43, certID), "height") + require.ErrorContains(t, validateAdminCertificate(cert, 7, 42, common.HexToHash("0x01")), "certificate ID") +} + +func TestRunExportCertExitsWritesOverrideAndManifest(t *testing.T) { + cert42 := exportTestCertificate(7, 42, []*agglayertypes.BridgeExit{ + exportTestBridgeExit(1), + }) + cert43 := exportTestCertificate(7, 43, nil) + cert42ID := cert42.CertificateID() + cert43ID := cert43.CertificateID() + + oldFetch := fetchAdminCertificate + fetchAdminCertificate = func(_ context.Context, adminURL string, certID common.Hash) (*agglayertypes.Certificate, error) { + require.Equal(t, "http://example.test/admin?debug=true", adminURL) + switch certID { + case cert42ID: + return cert42, nil + case cert43ID: + return cert43, nil + default: + t.Fatalf("unexpected cert ID: %s", certID.Hex()) + return nil, nil + } + } + t.Cleanup(func() { fetchAdminCertificate = oldFetch }) + + tmpDir := t.TempDir() + cfgPath := filepath.Join(tmpDir, "config.toml") + require.NoError(t, os.WriteFile(cfgPath, []byte("[BackwardForwardLET]\nL2NetworkID = 7\n"), 0o600)) + certIDsPath := filepath.Join(tmpDir, "cert-ids.json") + require.NoError(t, os.WriteFile(certIDsPath, []byte(`{ + "network_id": 7, + "certificates": { + "42": "`+cert42ID.Hex()+`", + "43": "`+cert43ID.Hex()+`" + } + }`), 0o600)) + + outPath := filepath.Join(tmpDir, "override.json") + manifestPath := filepath.Join(tmpDir, "manifest.json") + ctx := newExportCertExitsCLIContext(t, cfgPath, map[string]string{ + "agglayer-admin-url": "http://example.test/admin?debug=true", + "cert-ids-file": certIDsPath, + "out": outPath, + "manifest-out": manifestPath, + "max-certs": "10", + "timeout": "1m", + }) + + require.NoError(t, RunExportCertExits(ctx)) + + overrideData, err := os.ReadFile(outPath) + require.NoError(t, err) + var override overrideFileJSON + require.NoError(t, json.Unmarshal(overrideData, &override)) + require.Equal(t, uint32(7), override.NetworkID) + require.Len(t, override.Heights["42"], 1) + require.Empty(t, override.Heights["43"]) + + manifestData, err := os.ReadFile(manifestPath) + require.NoError(t, err) + var manifest exportCertExitsManifest + require.NoError(t, json.Unmarshal(manifestData, &manifest)) + require.Equal(t, "http://example.test/admin", manifest.AgglayerAdminURL) + require.Len(t, manifest.Certificates, 2) + require.Equal(t, uint64(42), manifest.Certificates[0].Height) + require.Equal(t, 1, manifest.Certificates[0].BridgeExitCount) + require.Equal(t, uint64(43), manifest.Certificates[1].Height) + require.Equal(t, 0, manifest.Certificates[1].BridgeExitCount) +} + +func TestRunExportCertExitsRejectsOverMaxCerts(t *testing.T) { + tmpDir := t.TempDir() + cfgPath := filepath.Join(tmpDir, "config.toml") + require.NoError(t, os.WriteFile(cfgPath, []byte("[BackwardForwardLET]\nL2NetworkID = 7\n"), 0o600)) + certID := common.HexToHash("0x1111111111111111111111111111111111111111111111111111111111111111") + certIDsPath := filepath.Join(tmpDir, "cert-ids.json") + require.NoError(t, os.WriteFile(certIDsPath, []byte(`{ + "network_id": 7, + "certificates": { + "1": "`+certID.Hex()+`", + "2": "`+certID.Hex()+`" + } + }`), 0o600)) + + ctx := newExportCertExitsCLIContext(t, cfgPath, map[string]string{ + "agglayer-admin-url": "http://example.test/admin", + "cert-ids-file": certIDsPath, + "out": filepath.Join(tmpDir, "override.json"), + "max-certs": "1", + "timeout": "1m", + }) + + err := RunExportCertExits(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "refusing to export 2 certificates") +} + +func newExportCertExitsCLIContext(t *testing.T, configPath string, flags map[string]string) *cli.Context { + t.Helper() + app := cli.NewApp() + app.Flags = []cli.Flag{ + &cli.StringSliceFlag{Name: "cfg", Aliases: []string{"c"}}, + } + parentSet := flag.NewFlagSet("app", flag.ContinueOnError) + for _, f := range app.Flags { + require.NoError(t, f.Apply(parentSet)) + } + require.NoError(t, parentSet.Parse([]string{"--cfg", configPath})) + parentCtx := cli.NewContext(app, parentSet, nil) + + commandSet := flag.NewFlagSet("export-cert-exits", flag.ContinueOnError) + commandSet.String("agglayer-admin-url", "", "") + commandSet.String("cert-ids-file", "", "") + commandSet.String("out", "", "") + commandSet.String("manifest-out", "", "") + commandSet.Uint64("max-certs", DefaultExportCertExitsMaxCerts, "") + commandSet.Duration("timeout", DefaultExportCertExitsTimeout, "") + for name, value := range flags { + require.NoError(t, commandSet.Set(name, value)) + } + return cli.NewContext(app, commandSet, parentCtx) +} + +func exportTestCertificate(networkID uint32, height uint64, exits []*agglayertypes.BridgeExit) *agglayertypes.Certificate { + if exits == nil { + exits = []*agglayertypes.BridgeExit{} + } + return &agglayertypes.Certificate{ + NetworkID: networkID, + Height: height, + PrevLocalExitRoot: common.HexToHash("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + NewLocalExitRoot: common.HexToHash("0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + BridgeExits: exits, + ImportedBridgeExits: []*agglayertypes.ImportedBridgeExit{}, + } +} + +func exportTestBridgeExit(index byte) *agglayertypes.BridgeExit { + return &agglayertypes.BridgeExit{ + LeafType: bridgetypes.LeafTypeAsset, + TokenInfo: &agglayertypes.TokenInfo{ + OriginNetwork: 0, + OriginTokenAddress: common.Address{}, + }, + DestinationNetwork: 7, + DestinationAddress: common.BytesToAddress([]byte{index}), + Amount: big.NewInt(int64(index)), + Metadata: nil, + } +} diff --git a/tools/backward_forward_let/network_info.go b/tools/backward_forward_let/network_info.go new file mode 100644 index 000000000..6d537f997 --- /dev/null +++ b/tools/backward_forward_let/network_info.go @@ -0,0 +1,32 @@ +package backward_forward_let + +import ( + "context" + "errors" + "fmt" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + aggkitgrpc "github.com/agglayer/aggkit/grpc" + "google.golang.org/grpc/codes" +) + +type networkInfoClient interface { + GetNetworkInfo(ctx context.Context, networkID uint32) (agglayertypes.NetworkInfo, error) +} + +func getNetworkInfoAllowNotFound( + ctx context.Context, + client networkInfoClient, + networkID uint32, +) (agglayertypes.NetworkInfo, bool, error) { + info, err := client.GetNetworkInfo(ctx, networkID) + if err == nil { + return info, false, nil + } + + var grpcErr aggkitgrpc.GRPCError + if errors.As(err, &grpcErr) && grpcErr.Code == codes.NotFound { + return agglayertypes.NetworkInfo{}, true, nil + } + return agglayertypes.NetworkInfo{}, false, fmt.Errorf("get network info from agglayer: %w", err) +} diff --git a/tools/backward_forward_let/network_info_test.go b/tools/backward_forward_let/network_info_test.go new file mode 100644 index 000000000..d3084c819 --- /dev/null +++ b/tools/backward_forward_let/network_info_test.go @@ -0,0 +1,47 @@ +package backward_forward_let + +import ( + "context" + "errors" + "testing" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + aggkitgrpc "github.com/agglayer/aggkit/grpc" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" +) + +type stubNetworkInfoClient struct { + info agglayertypes.NetworkInfo + err error +} + +func (s stubNetworkInfoClient) GetNetworkInfo( + _ context.Context, + _ uint32, +) (agglayertypes.NetworkInfo, error) { + return s.info, s.err +} + +func TestGetNetworkInfoAllowNotFound(t *testing.T) { + t.Parallel() + + info, notFound, err := getNetworkInfoAllowNotFound(context.Background(), stubNetworkInfoClient{ + err: aggkitgrpc.GRPCError{Code: codes.NotFound, Message: "not found"}, + }, 1) + + require.NoError(t, err) + require.True(t, notFound) + require.Empty(t, info) +} + +func TestGetNetworkInfoAllowNotFound_OtherError(t *testing.T) { + t.Parallel() + + _, notFound, err := getNetworkInfoAllowNotFound(context.Background(), stubNetworkInfoClient{ + err: errors.New("boom"), + }, 1) + + require.Error(t, err) + require.False(t, notFound) +} diff --git a/tools/backward_forward_let/recovery.go b/tools/backward_forward_let/recovery.go index c47f39a43..8cf90e5a2 100644 --- a/tools/backward_forward_let/recovery.go +++ b/tools/backward_forward_let/recovery.go @@ -8,6 +8,7 @@ import ( "github.com/0xPolygon/cdk-contracts-tooling/contracts/aggchain-multisig/agglayerbridgel2" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" + gethTypes "github.com/ethereum/go-ethereum/core/types" ) // ExecuteRecovery performs the on-chain recovery steps for the given diagnosis. @@ -78,6 +79,10 @@ func ExecuteRecovery(ctx context.Context, env *Env, diagnosis *DiagnosisResult) } } + if err := printFinalVerification(callOpts, env, diagnosis); err != nil { + return fmt.Errorf("final verification: %w", err) + } + return nil } @@ -94,6 +99,7 @@ func stepActivateEmergency( if err != nil { return fmt.Errorf("send ActivateEmergencyState tx: %w", err) } + printTxSent("ActivateEmergencyState", tx) receipt, err := env.waitReceiptFn(ctx, tx) if err != nil { @@ -102,6 +108,7 @@ func stepActivateEmergency( if receipt.Status != 1 { return fmt.Errorf("ActivateEmergencyState tx failed (status=%d)", receipt.Status) } + printTxConfirmed("ActivateEmergencyState", receipt) active, err := env.L2Bridge.IsEmergencyState(callOpts) if err != nil { @@ -128,6 +135,7 @@ func stepDeactivateEmergency( if err != nil { return fmt.Errorf("send DeactivateEmergencyState tx: %w", err) } + printTxSent("DeactivateEmergencyState", tx) receipt, err := env.waitReceiptFn(ctx, tx) if err != nil { @@ -136,6 +144,7 @@ func stepDeactivateEmergency( if receipt.Status != 1 { return fmt.Errorf("DeactivateEmergencyState tx failed (status=%d)", receipt.Status) } + printTxConfirmed("DeactivateEmergencyState", receipt) active, err := env.L2Bridge.IsEmergencyState(callOpts) if err != nil { @@ -188,6 +197,7 @@ func stepBackwardLET( if err != nil { return fmt.Errorf("send BackwardLET tx: %w", err) } + printTxSent("BackwardLET", tx) receipt, err := env.waitReceiptFn(ctx, tx) if err != nil { @@ -196,6 +206,7 @@ func stepBackwardLET( if receipt.Status != 1 { return fmt.Errorf("BackwardLET tx failed (status=%d)", receipt.Status) } + printTxConfirmed("BackwardLET", receipt) dcBig, err := env.L2Bridge.DepositCount(callOpts) if err != nil { @@ -255,6 +266,7 @@ func stepForwardLETDivergentLeaves( if err != nil { return fmt.Errorf("send ForwardLET (divergent leaves) tx: %w", err) } + printTxSent("ForwardLET (divergent leaves)", tx) receipt, err := env.waitReceiptFn(ctx, tx) if err != nil { @@ -263,6 +275,7 @@ func stepForwardLETDivergentLeaves( if receipt.Status != 1 { return fmt.Errorf("ForwardLET (divergent leaves) tx failed (status=%d)", receipt.Status) } + printTxConfirmed("ForwardLET (divergent leaves)", receipt) expectedDC := diagnosis.DivergencePoint + uint32(len(diagnosis.DivergentLeaves)) @@ -343,6 +356,7 @@ func stepForwardLETExtraL2Bridges( if err != nil { return fmt.Errorf("send ForwardLET (extra L2 bridges) tx: %w", err) } + printTxSent("ForwardLET (extra L2 bridges)", tx) receipt, err := env.waitReceiptFn(ctx, tx) if err != nil { @@ -351,6 +365,7 @@ func stepForwardLETExtraL2Bridges( if receipt.Status != 1 { return fmt.Errorf("ForwardLET (extra L2 bridges) tx failed (status=%d)", receipt.Status) } + printTxConfirmed("ForwardLET (extra L2 bridges)", receipt) expectedDC := afterDivergentCount + uint32(len(diagnosis.ExtraL2Bridges)) @@ -375,3 +390,54 @@ func stepForwardLETExtraL2Bridges( fmt.Printf("[step] ForwardLET (extra L2 bridges) complete. DC=%d, LER=%s\n", expectedDC, expectedLER.Hex()) return nil } + +func printTxSent(name string, tx *gethTypes.Transaction) { + hash, ok := txHashHex(tx) + if !ok { + fmt.Printf("[tx] %s sent.\n", name) + return + } + fmt.Printf("[tx] %s sent: %s\n", name, hash) +} + +func txHashHex(tx *gethTypes.Transaction) (hash string, ok bool) { + if tx == nil { + return "", false + } + defer func() { + if recover() != nil { + hash = "" + ok = false + } + }() + return tx.Hash().Hex(), true +} + +func printTxConfirmed(name string, receipt *gethTypes.Receipt) { + if receipt == nil || receipt.BlockNumber == nil { + fmt.Printf("[tx] %s confirmed.\n", name) + return + } + fmt.Printf("[tx] %s confirmed in block %s.\n", name, receipt.BlockNumber.String()) +} + +func printFinalVerification(callOpts *bind.CallOpts, env *Env, diagnosis *DiagnosisResult) error { + dcBig, err := env.L2Bridge.DepositCount(callOpts) + if err != nil { + return fmt.Errorf("get final deposit count: %w", err) + } + root32, err := env.L2Bridge.GetRoot(callOpts) + if err != nil { + return fmt.Errorf("get final LER: %w", err) + } + + finalDC := uint32(dcBig.Uint64()) + finalLER := common.Hash(root32) + fmt.Printf("[verify] Final L2 state: DC=%d, LER=%s\n", finalDC, finalLER.Hex()) + if finalLER == diagnosis.L1SettledLER && finalDC == diagnosis.L1SettledDepositCount { + fmt.Println("[verify] Final L2 state matches L1 settled state.") + } else { + fmt.Println("[verify] Final L2 state includes replayed L2 bridge data; rerun diagnosis after aggsender settles the follow-up certificate.") + } + return nil +} diff --git a/tools/backward_forward_let/run.go b/tools/backward_forward_let/run.go index e14e57b5d..36082d748 100644 --- a/tools/backward_forward_let/run.go +++ b/tools/backward_forward_let/run.go @@ -174,14 +174,19 @@ func Run(c *cli.Context) error { PrintDiagnosis(os.Stdout, diagnosis) - if diagnosis.Case == NoDivergence { + if diagnosis.AggsenderAPIFailed { + fmt.Printf("\nNo recovery transactions were sent.\n") + fmt.Printf("Provide the missing certificate exits with --cert-exits-file, then rerun diagnosis.\n") + return nil + } + + if diagnosis.IsCompleteNoDivergence() { fmt.Println("Nothing to do: L1 settled state and L2 on-chain state are in sync.") return nil } - if diagnosis.AggsenderAPIFailed { - fmt.Printf("\nAggsender RPC was unreachable. Cannot proceed with recovery.\n") - fmt.Printf("Contact your AggLayer admin with the failed certificate details above.\n") + if c.Bool("diagnose-only") { + fmt.Println("Diagnose-only mode: no recovery transactions were sent.") return nil } diff --git a/tools/backward_forward_let/send_cert.go b/tools/backward_forward_let/send_cert.go index b29d9ce0a..1c94c2c99 100644 --- a/tools/backward_forward_let/send_cert.go +++ b/tools/backward_forward_let/send_cert.go @@ -52,6 +52,14 @@ func RunSendCert(c *cli.Context) error { return fmt.Errorf("parse certificate JSON: %w", err) } + noDB := c.Bool("no-db") + if noDB && !c.Bool("staging-only") { + return fmt.Errorf("--no-db requires --staging-only") + } + if noDB && c.String("db-path") != "" { + return fmt.Errorf("--no-db and --db-path are mutually exclusive") + } + // Create agglayer client. logger := log.GetDefaultLogger() agglayerClient, err := agglayer.NewAgglayerClient(cfg.AgglayerClient, logger) @@ -60,6 +68,10 @@ func RunSendCert(c *cli.Context) error { } // Open aggsender DB. + if noDB { + return sendCertificate(c.Context, cert, certJSON, agglayerClient, nil) + } + dbPath := c.String("db-path") storage, err := openAggsenderStorage(logger, dbPath) if err != nil { @@ -83,7 +95,14 @@ func sendCertificate( if err != nil { return fmt.Errorf("send certificate to agglayer: %w", err) } - fmt.Printf("Certificate sent. Hash: %s\n", certHash.Hex()) + fmt.Printf("Certificate ID: %s\n", certHash.Hex()) + fmt.Printf("Certificate height: %d\n", cert.Height) + + if storage == nil { + fmt.Println("Aggsender DB storage skipped (--no-db).") + fmt.Printf("Next: backward-forward-let --cfg cert-status --wait-settled --height %d\n", cert.Height) + return nil + } // Derive FromBlock from the previous certificate so that aggsender's retry // verification (verifyRetryCertStartingBlock) passes when this cert goes InError. @@ -120,9 +139,11 @@ func sendCertificate( // Store in DB. if err := storage.SaveLastSentCertificate(ctx, record); err != nil { - return fmt.Errorf("store certificate in aggsender DB: %w", err) + return fmt.Errorf("certificate sent with hash %s at height %d, but storing in aggsender DB failed: %w", + certHash.Hex(), cert.Height, err) } fmt.Printf("Certificate stored in aggsender DB at height %d.\n", cert.Height) + fmt.Printf("Next: backward-forward-let --cfg cert-status --wait-settled --height %d\n", cert.Height) return nil } diff --git a/tools/backward_forward_let/send_cert_test.go b/tools/backward_forward_let/send_cert_test.go index 1f20d503f..a553d73fb 100644 --- a/tools/backward_forward_let/send_cert_test.go +++ b/tools/backward_forward_let/send_cert_test.go @@ -144,6 +144,20 @@ func TestSendCertificate_HappyPath(t *testing.T) { require.Equal(t, certJSON, *storage.saved.SignedCertificate) } +func TestSendCertificate_NoDB(t *testing.T) { + t.Parallel() + + expectedHash := common.HexToHash("0xbeef") + sender := &stubAgglayerSender{hash: expectedHash} + + certJSON := minimalCertJSON(9) + var cert agglayertypes.Certificate + require.NoError(t, cert.UnmarshalJSON([]byte(certJSON))) + + err := sendCertificate(context.Background(), cert, certJSON, sender, nil) + require.NoError(t, err) +} + func TestSendCertificate_AgglayerError(t *testing.T) { t.Parallel() @@ -287,6 +301,8 @@ func newSendCertCLIContext(flags map[string]string) *cli.Context { fs.String("cert-json", "", "") fs.String("cert-file", "", "") fs.String("db-path", "", "") + fs.Bool("no-db", false, "") + fs.Bool("staging-only", false, "") for name, val := range flags { _ = fs.Set(name, val) } @@ -347,3 +363,29 @@ func TestRunSendCert_InvalidCertJSON(t *testing.T) { require.Error(t, err) require.Contains(t, err.Error(), "parse certificate JSON") } + +func TestRunSendCert_NoDBRequiresStagingOnly(t *testing.T) { + t.Parallel() + + ctx := newSendCertCLIContext(map[string]string{ + "cert-json": minimalCertJSON(1), + "no-db": "true", + }) + err := RunSendCert(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "--no-db requires --staging-only") +} + +func TestRunSendCert_NoDBRejectsDBPath(t *testing.T) { + t.Parallel() + + ctx := newSendCertCLIContext(map[string]string{ + "cert-json": minimalCertJSON(1), + "db-path": "/tmp/test.sqlite", + "no-db": "true", + "staging-only": "true", + }) + err := RunSendCert(ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "--no-db and --db-path are mutually exclusive") +} diff --git a/tools/backward_forward_let/types.go b/tools/backward_forward_let/types.go index 4266416aa..fe5f60b5c 100644 --- a/tools/backward_forward_let/types.go +++ b/tools/backward_forward_let/types.go @@ -94,3 +94,9 @@ type MissingCertInfo struct { // When false, the operator must contact the agglayer admin. CertIDResolved bool } + +// IsCompleteNoDivergence reports whether the diagnosis fully proved that the +// L1 settled state and L2 on-chain state are in sync. +func (r *DiagnosisResult) IsCompleteNoDivergence() bool { + return r != nil && r.Case == NoDivergence && !r.AggsenderAPIFailed +} From 1f120bf7ae607640599c38441d255356c01146f7 Mon Sep 17 00:00:00 2001 From: AIrnau Bennassar Date: Sat, 9 May 2026 09:07:56 +0000 Subject: [PATCH 3/4] Fix BFL lint and E2E sequencing --- test/e2e/backwardforwardlet_test.go | 78 ++++++++++++++++++- tools/backward_forward_let/cert_status.go | 15 ++-- tools/backward_forward_let/cmd/main.go | 28 +++++-- tools/backward_forward_let/craft_cert.go | 24 ++++-- tools/backward_forward_let/diagnosis.go | 6 +- .../backward_forward_let/export_cert_exits.go | 26 +++++-- tools/backward_forward_let/helpers.go | 10 ++- tools/backward_forward_let/recovery.go | 13 ++-- 8 files changed, 166 insertions(+), 34 deletions(-) diff --git a/test/e2e/backwardforwardlet_test.go b/test/e2e/backwardforwardlet_test.go index f80be6371..e3ad0ce4a 100644 --- a/test/e2e/backwardforwardlet_test.go +++ b/test/e2e/backwardforwardlet_test.go @@ -229,10 +229,11 @@ func TestBackwardForwardLET_Case2(t *testing.T) { defer recoveryCancel() err = bfl.ExecuteRecovery(recoveryCtx, toolEnv, diagnosis) require.NoError(t, err) + waitForAggsenderFollowUpCertificate(ctx, t, toolEnv, "case2-recovery") // Verify: DC should equal DivergencePoint + divergent leaves + extra real bridges. - // For Case2, L2 LER will NOT match L1 settled LER because extra real L2 bridges were - // appended after the fake leaf; the next aggsender cert will advance L1 to match. + // For Case2, recovery appends extra real L2 bridges after the fake leaf. The follow-up + // aggsender certificate above advances L1 to the recovered L2 state before the next test. callOpts := &bind.CallOpts{Context: ctx} expectedDC := diagnosis.DivergencePoint + uint32(len(diagnosis.DivergentLeaves)) + uint32(len(diagnosis.ExtraL2Bridges)) @@ -382,10 +383,11 @@ func TestBackwardForwardLET_Case4(t *testing.T) { defer recoveryCancel() err = bfl.ExecuteRecovery(recoveryCtx, toolEnv, diagnosis) require.NoError(t, err) + waitForAggsenderFollowUpCertificate(ctx, t, toolEnv, "case4-recovery") // Verify: DC should equal DivergencePoint + divergent leaves + extra real bridges. - // For Case4, L2 LER will NOT match L1 settled LER because extra real L2 bridges were - // appended after the fake leaves; the next aggsender cert will advance L1 to match. + // For Case4, recovery appends extra real L2 bridges after the fake leaves. The follow-up + // aggsender certificate above advances L1 to the recovered L2 state before later tests. callOpts := &bind.CallOpts{Context: ctx} expectedDC := diagnosis.DivergencePoint + uint32(len(diagnosis.DivergentLeaves)) + uint32(len(diagnosis.ExtraL2Bridges)) @@ -827,6 +829,74 @@ func waitForCertificateToSettle( require.NoError(t, err, "timeout waiting for certificate at height=%d to settle", expectedHeight) } +func waitForAggsenderFollowUpCertificate(ctx context.Context, t *testing.T, toolEnv *bfl.Env, label string) { + t.Helper() + waitForBridgeServiceSynced(ctx, t) + + callOpts := &bind.CallOpts{Context: ctx} + root, err := toolEnv.L2Bridge.GetRoot(callOpts) + require.NoError(t, err, "get L2 root before aggsender follow-up") + l2LER := common.Hash(root) + dcBig, err := toolEnv.L2Bridge.DepositCount(callOpts) + require.NoError(t, err, "get L2 deposit count before aggsender follow-up") + l2DC := uint32(dcBig.Uint64()) + + info, err := toolEnv.AgglayerClient.GetNetworkInfo(ctx, toolEnv.L2NetworkID) + require.NoError(t, err, "get network info before aggsender follow-up") + if agglayerMatchesL2(info, l2LER, l2DC) { + log.Infof("[waitForAggsenderFollowUpCertificate] %s already reconciled at dc=%d", label, l2DC) + return + } + + log.Infof("[waitForAggsenderFollowUpCertificate] %s triggering aggsender to reconcile dc=%d ler=%s", + label, l2DC, l2LER.Hex()) + triggerAggsenderCertificate(ctx, t) + + err = pollWithBackoff(ctx, bflNoPendingTimeout, backoffInitial, backoffMax, + "aggsender-follow-up-"+label, + func() (bool, error) { + pollInfo, pollErr := toolEnv.AgglayerClient.GetNetworkInfo(ctx, toolEnv.L2NetworkID) + if pollErr != nil { + log.Debugf("[waitForAggsenderFollowUpCertificate] GetNetworkInfo error (retrying): %v", pollErr) + return false, nil + } + + settledH := nilStr + if pollInfo.SettledHeight != nil { + settledH = fmt.Sprintf("%d", *pollInfo.SettledHeight) + } + settledLER := nilStr + if pollInfo.SettledLER != nil { + settledLER = pollInfo.SettledLER.Hex() + } + settledDC := nilStr + if pollInfo.SettledLETLeafCount != nil { + settledDC = fmt.Sprintf("%d", *pollInfo.SettledLETLeafCount) + } + log.Debugf("[waitForAggsenderFollowUpCertificate] settledH=%s settledLER=%s settledDC=%s", + settledH, settledLER, settledDC) + return agglayerMatchesL2(pollInfo, l2LER, l2DC), nil + }, + ) + require.NoError(t, err, "timeout waiting for aggsender follow-up certificate after %s", label) +} + +func agglayerMatchesL2(info agglayertypes.NetworkInfo, l2LER common.Hash, l2DC uint32) bool { + return info.SettledLER != nil && + *info.SettledLER == l2LER && + info.SettledLETLeafCount != nil && + uint32(*info.SettledLETLeafCount) == l2DC +} + +func triggerAggsenderCertificate(ctx context.Context, t *testing.T) { + t.Helper() + response, err := rpc.JSONRPCCallWithContext(ctx, testEnv.AggsenderRPCURL, "aggsender_triggerCertificate") + require.NoError(t, err, "trigger aggsender certificate") + if response.Error != nil { + require.Failf(t, "trigger aggsender certificate", "RPC error: %v", response.Error) + } +} + // loadCertSignerKey loads the sequencer keystore (the agglayer proof signer for PP networks). func loadCertSignerKey(t *testing.T) *ecdsa.PrivateKey { t.Helper() diff --git a/tools/backward_forward_let/cert_status.go b/tools/backward_forward_let/cert_status.go index 17117ebfc..77d54baf6 100644 --- a/tools/backward_forward_let/cert_status.go +++ b/tools/backward_forward_let/cert_status.go @@ -13,7 +13,10 @@ import ( "github.com/urfave/cli/v2" ) -const DefaultCertStatusTimeout = 30 * time.Minute +const ( + DefaultCertStatusTimeout = 30 * time.Minute + certStatusPollInterval = 15 * time.Second +) // RunCertStatus prints AggLayer certificate settlement and pending status. func RunCertStatus(c *cli.Context) error { @@ -56,9 +59,9 @@ func RunCertStatus(c *cli.Context) error { return fmt.Errorf("timed out after %s waiting for requested certificate status", timeout) } select { - case <-c.Context.Done(): - return c.Context.Err() - case <-time.After(15 * time.Second): + case <-c.Done(): + return c.Err() + case <-time.After(certStatusPollInterval): } } } @@ -111,7 +114,9 @@ func printCertStatusTo(w io.Writer, info agglayertypes.NetworkInfo, networkID ui switch { case info.SettledHeight != nil && *info.SettledHeight >= requestedHeight: fmt.Fprintf(w, "Requested height status: Settled\n") - case info.LatestPendingHeight != nil && *info.LatestPendingHeight == requestedHeight && info.LatestPendingStatus != nil: + case info.LatestPendingHeight != nil && + *info.LatestPendingHeight == requestedHeight && + info.LatestPendingStatus != nil: fmt.Fprintf(w, "Requested height status: %s\n", info.LatestPendingStatus.String()) default: fmt.Fprintln(w, "Requested height status: not settled") diff --git a/tools/backward_forward_let/cmd/main.go b/tools/backward_forward_let/cmd/main.go index d3db946f7..b30ad40b8 100644 --- a/tools/backward_forward_let/cmd/main.go +++ b/tools/backward_forward_let/cmd/main.go @@ -83,7 +83,10 @@ func main() { &cli.StringFlag{Name: "amount", Value: "0", Usage: "Fake bridge exit amount"}, &cli.UintFlag{Name: "starting-exit-index", Usage: "Starting index for deterministic fake exit uniqueness"}, &cli.StringFlag{Name: "nonce", Usage: "Optional nonce used to derive fake exit destination addresses"}, - &cli.Uint64Flag{Name: "l1-info-tree-leaf-count", Usage: "Override L1 info tree leaf count when aggsender header data is unavailable"}, + &cli.Uint64Flag{ + Name: "l1-info-tree-leaf-count", + Usage: "Override L1 info tree leaf count when aggsender header data is unavailable", + }, &cli.Uint64Flag{Name: "signer-index", Usage: "Multisig signer index to write into the crafted certificate"}, &cli.StringFlag{Name: "out", Usage: "Output path for the crafted certificate JSON", Required: true}, }, @@ -96,7 +99,11 @@ func main() { &cli.Uint64Flag{Name: "height", Usage: "Certificate height to check"}, &cli.BoolFlag{Name: "wait-no-pending", Usage: "Wait until AggLayer has no open pending certificate"}, &cli.BoolFlag{Name: "wait-settled", Usage: "Wait until --height is settled"}, - &cli.DurationFlag{Name: "timeout", Value: backward_forward_let.DefaultCertStatusTimeout, Usage: "Maximum wait duration"}, + &cli.DurationFlag{ + Name: "timeout", + Value: backward_forward_let.DefaultCertStatusTimeout, + Usage: "Maximum wait duration", + }, }, Action: backward_forward_let.RunCertStatus, }, @@ -107,9 +114,20 @@ func main() { &cli.StringFlag{Name: "agglayer-admin-url", Usage: "Read-only AggLayer admin JSON-RPC URL", Required: true}, &cli.StringFlag{Name: "cert-ids-file", Usage: "JSON file mapping certificate heights to cert IDs", Required: true}, &cli.StringFlag{Name: "out", Usage: "Output certificate exits override JSON path", Required: true}, - &cli.StringFlag{Name: "manifest-out", Usage: "Output source manifest JSON path (default: .manifest.json)"}, - &cli.Uint64Flag{Name: "max-certs", Value: backward_forward_let.DefaultExportCertExitsMaxCerts, Usage: "Maximum certificates to export in one batch"}, - &cli.DurationFlag{Name: "timeout", Value: backward_forward_let.DefaultExportCertExitsTimeout, Usage: "Maximum export duration"}, + &cli.StringFlag{ + Name: "manifest-out", + Usage: "Output source manifest JSON path (default: .manifest.json)", + }, + &cli.Uint64Flag{ + Name: "max-certs", + Value: backward_forward_let.DefaultExportCertExitsMaxCerts, + Usage: "Maximum certificates to export in one batch", + }, + &cli.DurationFlag{ + Name: "timeout", + Value: backward_forward_let.DefaultExportCertExitsTimeout, + Usage: "Maximum export duration", + }, }, Action: backward_forward_let.RunExportCertExits, }, diff --git a/tools/backward_forward_let/craft_cert.go b/tools/backward_forward_let/craft_cert.go index e04543539..a0fa030a9 100644 --- a/tools/backward_forward_let/craft_cert.go +++ b/tools/backward_forward_let/craft_cert.go @@ -20,6 +20,8 @@ import ( "github.com/urfave/cli/v2" ) +const craftCertFileMode os.FileMode = 0o600 + // RunCraftCert builds a staging-only testing certificate and writes it as JSON. func RunCraftCert(c *cli.Context) error { if !c.Bool("staging-only") { @@ -65,7 +67,7 @@ func RunCraftCert(c *cli.Context) error { return fmt.Errorf("marshal crafted certificate: %w", err) } out := filepath.Clean(c.String("out")) - if err := os.WriteFile(out, data, 0o600); err != nil { + if err := os.WriteFile(out, data, craftCertFileMode); err != nil { return fmt.Errorf("write crafted certificate %s: %w", out, err) } @@ -96,7 +98,7 @@ func craftStagingCertificate( env *Env, opts craftCertOptions, ) (*agglayertypes.Certificate, error) { - amount, ok := new(big.Int).SetString(opts.Amount, 10) + amount, ok := new(big.Int).SetString(opts.Amount, decimalBase) if !ok || amount.Sign() < 0 { return nil, fmt.Errorf("--amount must be a non-negative base-10 integer") } @@ -119,18 +121,30 @@ func craftStagingCertificate( prevLER = *info.SettledLER existingLeafCount = uint32(*info.SettledLETLeafCount) if opts.RequireNoOpenCerts && hasOpenPendingAtOrAbove(info, certHeight) { - return nil, fmt.Errorf("pending certificate race: latest pending height/status is %s; wait for it to settle or enter InError before crafting", pendingSummary(info)) + return nil, fmt.Errorf( + "pending certificate race: latest pending height/status is %s; "+ + "wait for it to settle or enter InError before crafting", + pendingSummary(info), + ) } if l1InfoTreeLeafCount == 0 { count, err := l1InfoTreeLeafCountFromAggsender(env, *info.SettledHeight) if err != nil { - return nil, fmt.Errorf("get L1 info tree leaf count from aggsender for height %d: %w; rerun with --l1-info-tree-leaf-count", *info.SettledHeight, err) + return nil, fmt.Errorf( + "get L1 info tree leaf count from aggsender for height %d: %w; "+ + "rerun with --l1-info-tree-leaf-count", + *info.SettledHeight, err, + ) } l1InfoTreeLeafCount = count } } else { if opts.RequireNoOpenCerts && hasOpenPendingAtOrAbove(info, 0) { - return nil, fmt.Errorf("pending certificate race: latest pending height/status is %s; wait for it to settle or enter InError before crafting", pendingSummary(info)) + return nil, fmt.Errorf( + "pending certificate race: latest pending height/status is %s; "+ + "wait for it to settle or enter InError before crafting", + pendingSummary(info), + ) } callOpts := &bind.CallOpts{Context: ctx} root, err := env.L2Bridge.GetRoot(callOpts) diff --git a/tools/backward_forward_let/diagnosis.go b/tools/backward_forward_let/diagnosis.go index f145d3aa6..29b52ebe8 100644 --- a/tools/backward_forward_let/diagnosis.go +++ b/tools/backward_forward_let/diagnosis.go @@ -282,7 +282,11 @@ func collectExtraL2Bridges( br, err := env.BridgeService.GetBridgeByDepositCount(ctx, env.L2NetworkID, dc) if err != nil { if isNotFound(err) { - return nil, fmt.Errorf("bridge service data not ready for recovery: missing L2 bridge at DC=%d; wait for bridge-service indexing and rerun diagnosis", dc) + return nil, fmt.Errorf( + "bridge service data not ready for recovery: missing L2 bridge at DC=%d; "+ + "wait for bridge-service indexing and rerun diagnosis", + dc, + ) } return nil, fmt.Errorf("bridge service data not ready for recovery: get L2 bridge at DC=%d: %w", dc, err) } diff --git a/tools/backward_forward_let/export_cert_exits.go b/tools/backward_forward_let/export_cert_exits.go index 7296fb932..6446997f5 100644 --- a/tools/backward_forward_let/export_cert_exits.go +++ b/tools/backward_forward_let/export_cert_exits.go @@ -22,6 +22,7 @@ import ( const ( DefaultExportCertExitsMaxCerts uint64 = 2000 DefaultExportCertExitsTimeout = 30 * time.Minute + exportCertExitsFileMode = 0o600 ) var fetchAdminCertificate = fetchAgglayerAdminCertificate @@ -182,7 +183,11 @@ func sortedCertHeights(certIDs map[uint64]common.Hash) []uint64 { return heights } -func fetchAgglayerAdminCertificate(ctx context.Context, adminURL string, certID common.Hash) (*agglayertypes.Certificate, error) { +func fetchAgglayerAdminCertificate( + ctx context.Context, + adminURL string, + certID common.Hash, +) (*agglayertypes.Certificate, error) { response, err := rpc.JSONRPCCallWithContext(ctx, adminURL, "admin_getCertificate", certID) if err != nil { return nil, err @@ -192,7 +197,10 @@ func fetchAgglayerAdminCertificate(ctx context.Context, adminURL string, certID } var pair [2]json.RawMessage if err := json.Unmarshal(response.Result, &pair); err != nil { - return nil, fmt.Errorf("unmarshal admin_getCertificate result as [Certificate, CertificateHeader|null]: %w", err) + return nil, fmt.Errorf( + "unmarshal admin_getCertificate result as [Certificate, CertificateHeader|null]: %w", + err, + ) } if len(pair[0]) == 0 || string(pair[0]) == "null" { return nil, fmt.Errorf("admin_getCertificate returned nil certificate") @@ -204,7 +212,12 @@ func fetchAgglayerAdminCertificate(ctx context.Context, adminURL string, certID return &cert, nil } -func validateAdminCertificate(cert *agglayertypes.Certificate, networkID uint32, height uint64, certID common.Hash) error { +func validateAdminCertificate( + cert *agglayertypes.Certificate, + networkID uint32, + height uint64, + certID common.Hash, +) error { if cert == nil { return fmt.Errorf("certificate is nil") } @@ -215,7 +228,10 @@ func validateAdminCertificate(cert *agglayertypes.Certificate, networkID uint32, return fmt.Errorf("height %d does not match expected %d", cert.Height, height) } if calculated := cert.CertificateID(); calculated != certID { - return fmt.Errorf("calculated certificate ID %s does not match expected %s", calculated.Hex(), certID.Hex()) + return fmt.Errorf( + "calculated certificate ID %s does not match expected %s", + calculated.Hex(), certID.Hex(), + ) } return nil } @@ -225,7 +241,7 @@ func writeJSONFile(filePath string, value interface{}) error { if err != nil { return fmt.Errorf("marshal %s: %w", filePath, err) } - if err := os.WriteFile(filePath, append(data, '\n'), 0o600); err != nil { + if err := os.WriteFile(filePath, append(data, '\n'), exportCertExitsFileMode); err != nil { return fmt.Errorf("write %s: %w", filePath, err) } return nil diff --git a/tools/backward_forward_let/helpers.go b/tools/backward_forward_let/helpers.go index 620492fb9..624fbc5b2 100644 --- a/tools/backward_forward_let/helpers.go +++ b/tools/backward_forward_let/helpers.go @@ -132,7 +132,8 @@ func makeZeroHashes() []common.Hash { // set by any leaf insertion. This matches the contract's initial storage state and is required // by _checkValidSubtreeFrontier, which rejects non-zero values in unused positions. func computeFrontier(leafHashes []common.Hash, targetIndex uint32) ([32]common.Hash, error) { - if uint32(len(leafHashes)) < targetIndex { + target := int(targetIndex) + if len(leafHashes) < target { return [32]common.Hash{}, fmt.Errorf( "insufficient leaf hashes: need %d, got %d", targetIndex, len(leafHashes), ) @@ -143,10 +144,11 @@ func computeFrontier(leafHashes []common.Hash, targetIndex uint32) ([32]common.H // contract's initial _branch storage state before any leaves are inserted. var frontier [32]common.Hash - for i := uint32(0); i < targetIndex; i++ { - node := leafHashes[i] + for i := 0; i < target; i++ { + node := leafHashes[i] //nolint:gosec // i is bounded by len(leafHashes) through target. + leafIndex := uint32(i) for h := range 32 { - if (i>>h)&1 == 0 { + if (leafIndex>>h)&1 == 0 { // Left child: cache node at this height, propagate up with zero sibling. frontier[h] = node node = crypto.Keccak256Hash(node.Bytes(), zeros[h].Bytes()) diff --git a/tools/backward_forward_let/recovery.go b/tools/backward_forward_let/recovery.go index 8cf90e5a2..16cbecf51 100644 --- a/tools/backward_forward_let/recovery.go +++ b/tools/backward_forward_let/recovery.go @@ -179,12 +179,12 @@ func stepBackwardLET( } var frontierBytes [32][32]byte - for i, h := range frontier { - frontierBytes[i] = [32]byte(h) + for i := 0; i < len(frontier); i++ { + frontierBytes[i] = [32]byte(frontier[i]) } var proofBytes [32][32]byte - for i, h := range proof { - proofBytes[i] = [32]byte(h) + for i := 0; i < len(proof); i++ { + proofBytes[i] = [32]byte(proof[i]) } tx, err := env.L2Bridge.BackwardLET( @@ -437,7 +437,10 @@ func printFinalVerification(callOpts *bind.CallOpts, env *Env, diagnosis *Diagno if finalLER == diagnosis.L1SettledLER && finalDC == diagnosis.L1SettledDepositCount { fmt.Println("[verify] Final L2 state matches L1 settled state.") } else { - fmt.Println("[verify] Final L2 state includes replayed L2 bridge data; rerun diagnosis after aggsender settles the follow-up certificate.") + fmt.Println( + "[verify] Final L2 state includes replayed L2 bridge data; " + + "rerun diagnosis after aggsender settles the follow-up certificate.", + ) } return nil } From d2665cad5e8eb62df64adf928b4fe792520329d1 Mon Sep 17 00:00:00 2001 From: Arnau Bennassar Date: Sat, 9 May 2026 10:10:54 +0100 Subject: [PATCH 4/4] feat: load AggLayer certificate fallback files --- tools/backward_forward_let/cmd/main.go | 4 +- tools/backward_forward_let/config.go | 8 +- tools/backward_forward_let/diagnosis.go | 31 +++-- tools/backward_forward_let/override.go | 146 ++++++++++++++++++-- tools/backward_forward_let/override_test.go | 114 +++++++++++++++ 5 files changed, 270 insertions(+), 33 deletions(-) diff --git a/tools/backward_forward_let/cmd/main.go b/tools/backward_forward_let/cmd/main.go index b30ad40b8..05e1202aa 100644 --- a/tools/backward_forward_let/cmd/main.go +++ b/tools/backward_forward_let/cmd/main.go @@ -32,8 +32,8 @@ func main() { &cli.StringFlag{ Name: "cert-exits-file", Aliases: []string{"f"}, - Usage: "Path to a JSON override file containing pre-extracted bridge exits keyed by certificate height." + - " Use when the aggsender DB is empty and the tool reports missing cert IDs.", + Usage: "Path to a JSON fallback file containing either raw AggLayer certificates or pre-extracted bridge exits keyed by certificate height." + + " Use when the aggsender DB is empty and the tool reports missing certificate exits.", }, } app.Action = backward_forward_let.Run diff --git a/tools/backward_forward_let/config.go b/tools/backward_forward_let/config.go index 605d4e9f6..d56516671 100644 --- a/tools/backward_forward_let/config.go +++ b/tools/backward_forward_let/config.go @@ -54,11 +54,9 @@ type BackwardForwardLETConfig struct { // L2NetworkID is the network ID of the L2 chain. L2NetworkID uint32 `mapstructure:"L2NetworkID"` - // CertificateExitsFile is an optional path to a JSON override file containing - // pre-extracted bridge exits keyed by certificate height. When set, used as a - // fallback if the aggsender RPC cannot supply bridge exits for a height. - // Prefer generating the file with the export-cert-exits subcommand from an - // authoritative height-to-cert-ID map. + // CertificateExitsFile is an optional path to a JSON fallback file containing + // raw AggLayer certificates or pre-extracted bridge exits keyed by certificate + // height. When set, used if the aggsender RPC cannot supply bridge exits for a height. CertificateExitsFile string `mapstructure:"CertificateExitsFile"` } diff --git a/tools/backward_forward_let/diagnosis.go b/tools/backward_forward_let/diagnosis.go index 29b52ebe8..730ef1b16 100644 --- a/tools/backward_forward_let/diagnosis.go +++ b/tools/backward_forward_let/diagnosis.go @@ -136,7 +136,8 @@ type missingCertsError struct { // getBridgeExitsForHeight fetches bridge exits for a certificate height using a // two-source fallback chain: // 1. Aggsender RPC (primary) — works when the aggsender DB is intact. -// 2. JSON override file (secondary) — operator-supplied pre-extracted data. +// 2. JSON fallback file (secondary) — operator-supplied AggLayer certificate +// data or pre-extracted bridge exits. // // An error is returned only when both sources fail or the override has no entry // for the given height. @@ -434,8 +435,8 @@ func PrintDiagnosis(w io.Writer, result *DiagnosisResult) { // printMissingCertReport prints actionable, copy-pasteable instructions when one // or more certificate heights had no bridge exit data from any source. // It lists each missing height with its cert ID (or UNKNOWN), explains how to call -// admin_getCertificate on the agglayer, shows the override file template with the -// actual heights, and prints the re-run command. +// admin_getCertificate on the agglayer, shows the certificate export template +// with the actual heights, and prints the re-run command. func printMissingCertReport(w io.Writer, result *DiagnosisResult) { fmt.Fprintln(w, "Status: Missing certificate exits - recovery cannot continue yet.") fmt.Fprintln(w, "WARNING: Aggsender RPC returned no bridge exit data for the following certificate heights.") @@ -478,7 +479,8 @@ func printMissingCertReport(w io.Writer, result *DiagnosisResult) { } fmt.Fprintln(w, "Preferred batch export path:") - fmt.Fprintln(w, " 1. Build an authoritative cert ID map from agglayer admin data:") + fmt.Fprintln(w, " 1. Ask the agglayer admin owner to resolve an authoritative cert ID map") + fmt.Fprintln(w, " from agglayer state, then fetch raw admin_getCertificate responses:") fmt.Fprintln(w, " {") fmt.Fprintln(w, ` "network_id": ,`) fmt.Fprintln(w, ` "certificates": {`) @@ -495,16 +497,17 @@ func printMissingCertReport(w io.Writer, result *DiagnosisResult) { } fmt.Fprintln(w, " }") fmt.Fprintln(w, " }") - fmt.Fprintln(w, " 2. Export the override JSON:") - fmt.Fprintln(w, " backward-forward-let --cfg export-cert-exits \\") - fmt.Fprintln(w, " --agglayer-admin-url \\") - fmt.Fprintln(w, " --cert-ids-file \\") - fmt.Fprintln(w, " --out ") + fmt.Fprintln(w, " 2. Store the raw agglayer responses in an agglayer certificate file:") + fmt.Fprintln(w, " {") + fmt.Fprintln(w, ` "network_id": ,`) + fmt.Fprintln(w, ` "certificates": {`) + fmt.Fprintln(w, ` "": {"jsonrpc":"2.0","result":[, ]}`) + fmt.Fprintln(w, " }") + fmt.Fprintln(w, " }") fmt.Fprintln(w) - fmt.Fprintln(w, "The exporter calls admin_getCertificate for each cert ID, validates network/height,") - fmt.Fprintln(w, "preserves empty bridge-exit lists, writes a source manifest, and prints the") - fmt.Fprintln(w, "diagnosis/recovery follow-up commands.") + fmt.Fprintln(w, "The --cert-exits-file loader accepts either this raw agglayer certificate file") + fmt.Fprintln(w, "or the Aggkit-native heights-to-bridge_exits override format.") fmt.Fprintln(w) fmt.Fprintln(w, "Manual admin API shape for each KNOWN cert ID:") @@ -514,11 +517,11 @@ func printMissingCertReport(w io.Writer, result *DiagnosisResult) { fmt.Fprintln(w, ` {"jsonrpc":"2.0","method":"admin_getCertificate","params":[""],"id":1}`) fmt.Fprintln(w) fmt.Fprintln(w, " The response is [Certificate, CertificateHeader|null].") - fmt.Fprintln(w, ` Use export-cert-exits to extract and re-marshal the "bridge_exits" field.`) + fmt.Fprintln(w, " It can be stored directly under the matching height key in --cert-exits-file.") fmt.Fprintln(w) fmt.Fprintln(w, "Re-run the tool with:") - fmt.Fprintln(w, " backward-forward-let --cfg --cert-exits-file ") + fmt.Fprintln(w, " backward-forward-let --cfg --cert-exits-file ") } func recoveryDescription(c RecoveryCase) string { diff --git a/tools/backward_forward_let/override.go b/tools/backward_forward_let/override.go index 393c3d750..29276bafb 100644 --- a/tools/backward_forward_let/override.go +++ b/tools/backward_forward_let/override.go @@ -11,12 +11,6 @@ import ( // BridgeExitsOverride holds pre-extracted certificate bridge exits keyed by height. // Load via LoadBridgeExitsOverride. Use GetExits to retrieve exits for a specific height. -// -// NOTE: the JSON field names follow the Go agglayertypes.BridgeExit json tags -// (e.g., "dest_network", "dest_address"). The agglayer Rust serde may use different -// names (e.g., "destination_network"); if so, build the file by marshaling the -// Certificate.BridgeExits value obtained via json.Unmarshal from the admin API response, -// not from the raw Rust JSON text. type BridgeExitsOverride struct { NetworkID uint32 Description string @@ -38,10 +32,24 @@ type overrideFileJSON struct { Heights map[string][]*agglayertypes.BridgeExit `json:"heights"` } -// LoadBridgeExitsOverride reads and validates a JSON override file containing -// pre-extracted certificate bridge exits keyed by certificate height. +type bridgeExitsOverrideEnvelope struct { + NetworkID uint32 `json:"network_id"` + Description string `json:"description"` + Heights json.RawMessage `json:"heights"` + Certificates json.RawMessage `json:"certificates"` +} + +type agglayerCertificatesFileJSON struct { + NetworkID uint32 `json:"network_id"` + Description string `json:"description"` + Certificates map[string]json.RawMessage `json:"certificates"` +} + +// LoadBridgeExitsOverride reads and validates a JSON fallback file containing +// either raw AggLayer certificates or pre-extracted bridge exits keyed by +// certificate height. // -// Expected file format (heights are string-keyed; amount is a decimal string): +// Preferred Aggkit override file format (heights are string-keyed; amount is a decimal string): // // { // "network_id": 1, @@ -61,27 +69,57 @@ type overrideFileJSON struct { // } // } // +// AggLayer admin export format is also accepted. Each certificate value may be +// either a raw Certificate object, the raw admin_getCertificate JSON-RPC response, +// or the admin_getCertificate result pair [Certificate, CertificateHeader|null]: +// +// { +// "network_id": 1, +// "description": "optional description", +// "certificates": { +// "42": { +// "jsonrpc": "2.0", +// "id": 1, +// "result": [{ "network_id": 1, "height": 42, "bridge_exits": [] }, null] +// } +// } +// } +// // Returns an error when: // - the file cannot be read // - the JSON is malformed // - network_id is zero -// - the heights map is absent +// - neither heights nor certificates is present // - any height key is not a non-negative integer +// - an AggLayer certificate entry has a mismatched network_id or height func LoadBridgeExitsOverride(filePath string) (*BridgeExitsOverride, error) { data, err := os.ReadFile(filePath) if err != nil { return nil, fmt.Errorf("read override file %s: %w", filePath, err) } + var envelope bridgeExitsOverrideEnvelope + if err := json.Unmarshal(data, &envelope); err != nil { + return nil, fmt.Errorf("parse override file %s: %w", filePath, err) + } + + if len(envelope.Heights) > 0 { + return loadAggkitBridgeExitsOverride(filePath, data) + } + if len(envelope.Certificates) > 0 { + return loadAgglayerCertificatesOverride(filePath, data) + } + return nil, fmt.Errorf("override file %s: heights map is missing and certificates map is missing", filePath) +} + +func loadAggkitBridgeExitsOverride(filePath string, data []byte) (*BridgeExitsOverride, error) { var raw overrideFileJSON if err := json.Unmarshal(data, &raw); err != nil { return nil, fmt.Errorf("parse override file %s: %w", filePath, err) } - if raw.NetworkID == 0 { return nil, fmt.Errorf("override file %s: network_id must be non-zero", filePath) } - if raw.Heights == nil { return nil, fmt.Errorf("override file %s: heights map is missing", filePath) } @@ -101,3 +139,87 @@ func LoadBridgeExitsOverride(filePath string) (*BridgeExitsOverride, error) { parsed: parsed, }, nil } + +func loadAgglayerCertificatesOverride(filePath string, data []byte) (*BridgeExitsOverride, error) { + var raw agglayerCertificatesFileJSON + if err := json.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("parse agglayer certificates file %s: %w", filePath, err) + } + if raw.NetworkID == 0 { + return nil, fmt.Errorf("agglayer certificates file %s: network_id must be non-zero", filePath) + } + if raw.Certificates == nil { + return nil, fmt.Errorf("agglayer certificates file %s: certificates map is missing", filePath) + } + + parsed := make(map[uint64][]*agglayertypes.BridgeExit, len(raw.Certificates)) + for key, certRaw := range raw.Certificates { + height, parseErr := strconv.ParseUint(key, 10, 64) + if parseErr != nil { + return nil, fmt.Errorf("agglayer certificates file %s: non-numeric height key %q: %w", filePath, key, parseErr) + } + cert, err := extractAgglayerCertificate(certRaw) + if err != nil { + return nil, fmt.Errorf("agglayer certificates file %s: height %d: %w", filePath, height, err) + } + if cert.NetworkID != raw.NetworkID { + return nil, fmt.Errorf("agglayer certificates file %s: height %d certificate network_id %d does not match file network_id %d", + filePath, height, cert.NetworkID, raw.NetworkID) + } + if cert.Height != height { + return nil, fmt.Errorf("agglayer certificates file %s: height key %d does not match certificate height %d", + filePath, height, cert.Height) + } + exits := cert.BridgeExits + if exits == nil { + exits = []*agglayertypes.BridgeExit{} + } + parsed[height] = exits + } + + description := raw.Description + if description == "" { + description = "generated from agglayer admin_getCertificate responses" + } + return &BridgeExitsOverride{ + NetworkID: raw.NetworkID, + Description: description, + parsed: parsed, + }, nil +} + +func extractAgglayerCertificate(data json.RawMessage) (*agglayertypes.Certificate, error) { + var rpcResponse struct { + Result json.RawMessage `json:"result"` + Error json.RawMessage `json:"error"` + } + if err := json.Unmarshal(data, &rpcResponse); err == nil { + if len(rpcResponse.Error) > 0 && string(rpcResponse.Error) != "null" { + return nil, fmt.Errorf("admin_getCertificate response contains error: %s", string(rpcResponse.Error)) + } + if len(rpcResponse.Result) > 0 { + return extractAgglayerCertificate(rpcResponse.Result) + } + } + + var pair [2]json.RawMessage + if err := json.Unmarshal(data, &pair); err == nil && len(pair[0]) > 0 && string(pair[0]) != "null" { + return extractAgglayerCertificate(pair[0]) + } + + var wrapped struct { + Certificate json.RawMessage `json:"certificate"` + } + if err := json.Unmarshal(data, &wrapped); err == nil && len(wrapped.Certificate) > 0 { + return extractAgglayerCertificate(wrapped.Certificate) + } + + var cert agglayertypes.Certificate + if err := json.Unmarshal(data, &cert); err != nil { + return nil, fmt.Errorf("parse certificate: %w", err) + } + if cert.NetworkID == 0 { + return nil, fmt.Errorf("certificate network_id must be non-zero") + } + return &cert, nil +} diff --git a/tools/backward_forward_let/override_test.go b/tools/backward_forward_let/override_test.go index f95c2de77..3bfd51605 100644 --- a/tools/backward_forward_let/override_test.go +++ b/tools/backward_forward_let/override_test.go @@ -169,6 +169,120 @@ func TestLoadBridgeExitsOverride_RoundTrip(t *testing.T) { require.Nil(t, exits[0].Metadata) } +func TestLoadBridgeExitsOverride_AgglayerCertificateObject(t *testing.T) { + t.Parallel() + + originAddr := "0xAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + destAddr := "0xBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + path := writeOverrideFile(t, `{ + "network_id": 7, + "description": "raw agglayer certificates", + "certificates": { + "42": { + "network_id": 7, + "height": 42, + "prev_local_exit_root": "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "new_local_exit_root": "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "bridge_exits": [ + { + "leaf_type": 0, + "token_info": { + "origin_network": 1, + "origin_token_address": "`+originAddr+`" + }, + "dest_network": 2, + "dest_address": "`+destAddr+`", + "amount": "100", + "metadata": null + } + ], + "imported_bridge_exits": [], + "metadata": "0x0000000000000000000000000000000000000000000000000000000000000000" + }, + "43": { + "network_id": 7, + "height": 43, + "prev_local_exit_root": "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "new_local_exit_root": "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "bridge_exits": [], + "imported_bridge_exits": [], + "metadata": "0x0000000000000000000000000000000000000000000000000000000000000000" + } + } + }`) + + result, err := LoadBridgeExitsOverride(path) + require.NoError(t, err) + require.Equal(t, uint32(7), result.NetworkID) + require.Equal(t, "raw agglayer certificates", result.Description) + + exits42, ok := result.GetExits(42) + require.True(t, ok) + require.Len(t, exits42, 1) + require.Equal(t, uint32(2), exits42[0].DestinationNetwork) + require.Equal(t, common.HexToAddress(destAddr), exits42[0].DestinationAddress) + require.Equal(t, big.NewInt(100), exits42[0].Amount) + + exits43, ok := result.GetExits(43) + require.True(t, ok) + require.Empty(t, exits43) +} + +func TestLoadBridgeExitsOverride_AgglayerAdminGetCertificateResponse(t *testing.T) { + t.Parallel() + + path := writeOverrideFile(t, `{ + "network_id": 7, + "certificates": { + "42": { + "jsonrpc": "2.0", + "id": 1, + "result": [ + { + "network_id": 7, + "height": 42, + "prev_local_exit_root": "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "new_local_exit_root": "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "bridge_exits": [], + "imported_bridge_exits": [], + "metadata": "0x0000000000000000000000000000000000000000000000000000000000000000" + }, + null + ] + } + } + }`) + + result, err := LoadBridgeExitsOverride(path) + require.NoError(t, err) + require.Equal(t, "generated from agglayer admin_getCertificate responses", result.Description) + exits, ok := result.GetExits(42) + require.True(t, ok) + require.Empty(t, exits) +} + +func TestLoadBridgeExitsOverride_AgglayerCertificateRejectsMismatchedHeight(t *testing.T) { + t.Parallel() + + path := writeOverrideFile(t, `{ + "network_id": 7, + "certificates": { + "42": { + "network_id": 7, + "height": 43, + "prev_local_exit_root": "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "new_local_exit_root": "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "bridge_exits": [], + "imported_bridge_exits": [], + "metadata": "0x0000000000000000000000000000000000000000000000000000000000000000" + } + } + }`) + + _, err := LoadBridgeExitsOverride(path) + require.ErrorContains(t, err, "height key 42 does not match certificate height 43") +} + // TestLoadBridgeExitsOverride_NonNumericKey verifies that a non-numeric height key // causes an error. func TestLoadBridgeExitsOverride_NonNumericKey(t *testing.T) {