From a5a2812c74fd2407e08b1303a7f194132fa8528c Mon Sep 17 00:00:00 2001 From: Calin Martinconi Date: Wed, 13 May 2026 09:40:29 +0300 Subject: [PATCH 1/2] fix: fail fast when postage block is ahead of chain tip --- pkg/node/node.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pkg/node/node.go b/pkg/node/node.go index 35b78d4dc05..efe43f5ae9e 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -867,6 +867,31 @@ func NewBee( return nil, errors.New("postage contract is paused") } + // Refuse to start if the last-synced postage block sits ahead of the + // chain tip reported by the backend. The persisted state was advanced + // from earlier RPC responses, so the gap means the configured + // blockchain-rpc-endpoint is now returning data for a different chain + // than it was previously (a misrouted public RPC, a changed endpoint, + // or a load-balancer serving the wrong backend). Without this guard + // the postage listener loop spins until the 10-minute stalling + // timeout fires, surfacing as /stamps returning 503 "syncing in + // progress" the whole time (issue #4941). + if cs := batchStore.GetChainState(); cs.Block > 0 { + blockNumber, blockErr := chainBackend.BlockNumber(ctx) + switch { + case blockErr != nil: + logger.Warning("could not verify chain tip against stored chainstate", "error", blockErr) + case cs.Block > blockNumber: + return nil, fmt.Errorf( + "blockchain-rpc-endpoint reports block %d, but the local batch store has already synced past it to block %d. "+ + "This means the RPC endpoint is now serving a different chain than it was on a previous run. "+ + "Confirm that blockchain-rpc-endpoint points to the correct network (compare its eth_chainId and current block height against a second, trusted endpoint). "+ + "Once the RPC is correct, restart with --resync to rebuild the batch store from the right chain", + blockNumber, cs.Block, + ) + } + } + if o.FullNodeMode { err = batchSvc.Start(ctx, postageSyncStart) syncStatus.Store(true) From f33ea8f6b5a185f636252a6ecf0995e7944626fd Mon Sep 17 00:00:00 2001 From: Calin Martinconi Date: Wed, 13 May 2026 10:43:32 +0300 Subject: [PATCH 2/2] fix: probe block height at startup before failing on chainstate ahead --- pkg/node/node.go | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/pkg/node/node.go b/pkg/node/node.go index efe43f5ae9e..53d96ec5419 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -202,6 +202,8 @@ const ( basePrice = 10_000 // minimal price for retrieval and pushsync requests of maximum proximity postageSyncingStallingTimeout = 10 * time.Minute // postageSyncingBackoffTimeout = 5 * time.Second // + startupBlockHeightChecks = 3 // number of probes at startup before declaring stored chainstate ahead of backend + startupBlockHeightBackoff = 5 * time.Second // wait between startup block-height probes minPaymentThreshold = 2 * refreshRate // minimal accepted payment threshold of full nodes maxPaymentThreshold = 24 * refreshRate // maximal accepted payment threshold of full nodes mainnetNetworkID = uint64(1) // @@ -868,26 +870,42 @@ func NewBee( } // Refuse to start if the last-synced postage block sits ahead of the - // chain tip reported by the backend. The persisted state was advanced - // from earlier RPC responses, so the gap means the configured - // blockchain-rpc-endpoint is now returning data for a different chain - // than it was previously (a misrouted public RPC, a changed endpoint, - // or a load-balancer serving the wrong backend). Without this guard - // the postage listener loop spins until the 10-minute stalling - // timeout fires, surfacing as /stamps returning 503 "syncing in - // progress" the whole time (issue #4941). + // block number reported by the backend. The persisted state was + // advanced from earlier RPC responses, so a persistent gap means the + // configured blockchain-rpc-endpoint is now returning data for a + // different chain than it was previously (a misrouted public RPC, a + // changed endpoint, or a load-balancer serving the wrong backend). + // Probe a few times with a short backoff so a single bad response + // (transient RPC blip, brief failover) does not lock the node out. + // Without this guard the postage listener loop would spin until the + // 10-minute stalling timeout fires, surfacing as /stamps returning + // 503 "syncing in progress" the whole time (issue #4941). if cs := batchStore.GetChainState(); cs.Block > 0 { - blockNumber, blockErr := chainBackend.BlockNumber(ctx) + var ( + blockHeight uint64 + blockErr error + ) + for i := 0; i < startupBlockHeightChecks; i++ { + blockHeight, blockErr = chainBackend.BlockNumber(ctx) + if blockErr != nil || blockHeight >= cs.Block { + break + } + select { + case <-time.After(startupBlockHeightBackoff): + case <-ctx.Done(): + return nil, ctx.Err() + } + } switch { case blockErr != nil: - logger.Warning("could not verify chain tip against stored chainstate", "error", blockErr) - case cs.Block > blockNumber: + logger.Warning("could not verify block height against stored chainstate", "error", blockErr) + case cs.Block > blockHeight: return nil, fmt.Errorf( - "blockchain-rpc-endpoint reports block %d, but the local batch store has already synced past it to block %d. "+ + "blockchain-rpc-endpoint reports block %d after %d checks, but the local batch store has already synced past it to block %d. "+ "This means the RPC endpoint is now serving a different chain than it was on a previous run. "+ "Confirm that blockchain-rpc-endpoint points to the correct network (compare its eth_chainId and current block height against a second, trusted endpoint). "+ "Once the RPC is correct, restart with --resync to rebuild the batch store from the right chain", - blockNumber, cs.Block, + blockHeight, startupBlockHeightChecks, cs.Block, ) } }