cometbft · hvanz · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024
@@ -0,0 +1,3 @@
+- `[mempool]` Do not recheck transactions when the node is lagging behind with respect to its peers.
+  This should allow the node to catch up faster.
+  ([\#3343](https://github.com/cometbft/cometbft/pull/3343))
@@ -20,6 +20,8 @@ import (
 	"github.com/cometbft/cometbft/types"
 )
 
+const maxHeightDiffForRechecking = 2
+
 // CListMempool is an ordered in-memory pool for transactions before they are
 // proposed in a consensus round. Transaction validity is checked using the
 // CheckTx abci message before the transaction is added to the pool. The
@@ -56,6 +58,8 @@ type CListMempool struct {
 	// This reduces the pressure on the proxyApp.
 	cache TxCache
 
+	sw *p2p.Switch
+
 	logger  log.Logger
 	metrics *Metrics
 }
@@ -147,6 +151,14 @@ func (mem *CListMempool) SetLogger(l log.Logger) {
 	mem.logger = l
 }
 
+// SetSwitch sets the switch to access the peers.
+func (mem *CListMempool) SetSwitch(sw *p2p.Switch) {
+	if sw == nil {
+		mem.logger.Error("switch passed as argument is empty")
+	}
+	mem.sw = sw
+}
+
 // WithPreCheck sets a filter for the mempool to reject a tx if f(tx) returns
 // false. This is ran before CheckTx. Only applies to the first created block.
 // After that, Update overwrites the existing value.
@@ -608,7 +620,25 @@ func (mem *CListMempool) Update(
 
 	// Recheck txs left in the mempool to remove them if they became invalid in the new state.
 	if mem.config.Recheck {
-		mem.recheckTxs()
+		// Don't perform recheck if one of the peers is known to be at least at our height +
+		// maxHeightDiffForRechecking.
+		doRecheck := true
+		if mem.sw != nil {
+			mem.sw.Peers().ForEach(func(peer p2p.Peer) {
+				if peerState, ok := peer.Get(types.PeerStateKey).(PeerState); ok {
+					if peerState.GetHeight() >= height+maxHeightDiffForRechecking {
+						doRecheck = false
+						return
+					}
+				}
+			})
+		}
+
+		if doRecheck {
+			mem.recheckTxs()
+		} else {
+			mem.logger.Info("recheck disabled at this height", "height", height)
+		}
 	}
 
 	// Notify if there are still txs left in the mempool.

@@ -67,6 +67,7 @@ func (memR *Reactor) OnStart() error {
 	if !memR.config.Broadcast {
 		memR.Logger.Info("Tx broadcasting is disabled")
 	}
+	memR.mempool.SetSwitch(memR.Switch)
 	return nil
 }
 

@@ -0,0 +1,222 @@
+# In this testnet, the full nodes start late and then never catch up to the latest height because
+# rechecking transactions is very slow (due to the `check_tx_delay` setting). Solved by disabling
+# rechecking when the nodes are lagging behind their peers by more than N blocks (in the PR, N=2).
+# (https://github.com/cometbft/cometbft/pull/3343)
+
+ipv6 = false
+initial_height = 0
+disable_pex = false
+key_type = ""
+abci_protocol = "tcp"
+prepare_proposal_delay = "200ms"
+process_proposal_delay = "200ms"
+check_tx_delay = "120ms"
+vote_extension_delay = "100ms"
+finalize_block_delay = "500ms"
+upgrade_version = ""
+load_tx_size_bytes = 0
+load_tx_batch_size = 0
+load_tx_connections = 0
+load_max_txs = 0
+prometheus = true
+block_max_bytes = 0
+vote_extension_size = 2048
+vote_extensions_enable_height = 11
+vote_extensions_update_height = 0
+peer_gossip_intraloop_sleep_duration = "0s"
+experimental_max_gossip_connections_to_persistent_peers = 0
+experimental_max_gossip_connections_to_non_persistent_peers = 0
+abci_tests_enabled = false
+default_zone = ""
+pbts_enable_height = 11
+pbts_update_height = 0
+
+[initial_state]
+
+[validators]
+  validator01 = 83
+  validator02 = 46
+  validator03 = 50
+  validator04 = 36
+  validator05 = 70
+
+[node]
+  [node.full01]
+    mode = "full"
+    version = ""
+    persistent_peers = ["validator04", "validator01"]
+    privval_protocol = "tcp"
+    start_at = 20
+    block_sync_version = "v0"
+    state_sync = true
+    persist_interval = 0
+    snapshot_interval = 3
+    retain_blocks = 0
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.full02]
+    mode = "full"
+    version = ""
+    persistent_peers = ["validator02", "validator05"]
+    privval_protocol = "file"
+    start_at = 30
+    block_sync_version = "v0"
+    state_sync = true
+    persist_interval = 0
+    snapshot_interval = 3
+    retain_blocks = 0
+    enable_companion_pruning = true
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.light01]
+    mode = "light"
+    version = ""
+    persistent_peers = ["validator04", "validator03", "validator01", "validator02"]
+    privval_protocol = ""
+    start_at = 10
+    block_sync_version = ""
+    state_sync = false
+    persist_interval = 0
+    snapshot_interval = 0
+    retain_blocks = 0
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.light02]
+    mode = "light"
+    version = ""
+    persistent_peers = ["validator04", "validator03", "validator01", "validator02"]
+    privval_protocol = ""
+    start_at = 15
+    block_sync_version = ""
+    state_sync = false
+    persist_interval = 0
+    snapshot_interval = 0
+    retain_blocks = 0
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.validator01]
+    mode = "validator"
+    version = ""
+    privval_protocol = "file"
+    start_at = 0
+    block_sync_version = "v0"
+    state_sync = false
+    persist_interval = 1
+    snapshot_interval = 3
+    retain_blocks = 0
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.validator02]
+    mode = "validator"
+    version = ""
+    persistent_peers = ["validator01"]
+    privval_protocol = "file"
+    start_at = 0
+    block_sync_version = "v0"
+    state_sync = false
+    persist_interval = 5
+    snapshot_interval = 3
+    retain_blocks = 0
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.validator03]
+    mode = "validator"
+    version = ""
+    persistent_peers = ["validator01"]
+    privval_protocol = "tcp"
+    start_at = 0
+    block_sync_version = "v0"
+    state_sync = false
+    persist_interval = 0
+    snapshot_interval = 0
+    retain_blocks = 0
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.validator04]
+    mode = "validator
950E
"
+    version = ""
+    persistent_peers = ["validator01", "validator02"]
+    privval_protocol = "tcp"
+    start_at = 0
+    block_sync_version = "v0"
+    state_sync = false
+    persist_interval = 0
+    snapshot_interval = 3
+    retain_blocks = 0
+    enable_companion_pruning = true
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"
+  [node.validator05]
+    mode = "validator"
+    version = ""
+    persistent_peers = ["validator04"]
+    privval_protocol = "tcp"
+    start_at = 0
+    block_sync_version = "v0"
+    state_sync = false
+    persist_interval = 5
+    snapshot_interval = 3
+    retain_blocks = 56
+    enable_companion_pruning = false
+    send_no_load = false
+    zone = ""
+    experimental_db_key_layout = ""
+    compact = false
+    compaction_interval = 0
+    discard_abci_responses = false
+    indexer = ""
+    clock_skew = "0s"