diff --git a/pkg/node/node.go b/pkg/node/node.go index 35b78d4dc05..53d96ec5419 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -202,6 +202,8 @@ const ( basePrice = 10_000 // minimal price for retrieval and pushsync requests of maximum proximity postageSyncingStallingTimeout = 10 * time.Minute // postageSyncingBackoffTimeout = 5 * time.Second // + startupBlockHeightChecks = 3 // number of probes at startup before declaring stored chainstate ahead of backend + startupBlockHeightBackoff = 5 * time.Second // wait between startup block-height probes minPaymentThreshold = 2 * refreshRate // minimal accepted payment threshold of full nodes maxPaymentThreshold = 24 * refreshRate // maximal accepted payment threshold of full nodes mainnetNetworkID = uint64(1) // @@ -867,6 +869,47 @@ func NewBee( return nil, errors.New("postage contract is paused") } + // Refuse to start if the last-synced postage block sits ahead of the + // block number reported by the backend. The persisted state was + // advanced from earlier RPC responses, so a persistent gap means the + // configured blockchain-rpc-endpoint is now returning data for a + // different chain than it was previously (a misrouted public RPC, a + // changed endpoint, or a load-balancer serving the wrong backend). + // Probe a few times with a short backoff so a single bad response + // (transient RPC blip, brief failover) does not lock the node out. + // Without this guard the postage listener loop would spin until the + // 10-minute stalling timeout fires, surfacing as /stamps returning + // 503 "syncing in progress" the whole time (issue #4941). + if cs := batchStore.GetChainState(); cs.Block > 0 { + var ( + blockHeight uint64 + blockErr error + ) + for i := 0; i < startupBlockHeightChecks; i++ { + blockHeight, blockErr = chainBackend.BlockNumber(ctx) + if blockErr != nil || blockHeight >= cs.Block { + break + } + select { + case <-time.After(startupBlockHeightBackoff): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + switch { + case blockErr != nil: + logger.Warning("could not verify block height against stored chainstate", "error", blockErr) + case cs.Block > blockHeight: + return nil, fmt.Errorf( + "blockchain-rpc-endpoint reports block %d after %d checks, but the local batch store has already synced past it to block %d. "+ + "This means the RPC endpoint is now serving a different chain than it was on a previous run. "+ + "Confirm that blockchain-rpc-endpoint points to the correct network (compare its eth_chainId and current block height against a second, trusted endpoint). "+ + "Once the RPC is correct, restart with --resync to rebuild the batch store from the right chain", + blockHeight, startupBlockHeightChecks, cs.Block, + ) + } + } + if o.FullNodeMode { err = batchSvc.Start(ctx, postageSyncStart) syncStatus.Store(true)